In [5]:
# !pip install numpy
# !pip install opencv-python
# !pip install matplotlib
# !pip install tensorflow
# !pip install scikit-learn
# !pip install keras-tuner

In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters

  from kerastuner.tuners import RandomSearch


In [2]:
# Set the path to your handwriting dataset
dataset_dir = 'Resized-Datasets'

# Load the dataset
images = []
labels = []

# Assuming each subdirectory in the dataset directory corresponds to a different writer
for writer_dir in os.listdir(dataset_dir):
    writer_images = []
    writer_labels = []
    writer_path = os.path.join(dataset_dir, writer_dir)
#     print("writer_path: ", writer_path)
    
    # Assuming each image file in the writer directory corresponds to a handwriting sample
    for image_file in os.listdir(writer_path):
        image_path = os.path.join(writer_path, image_file)
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
#         img = cv2.resize(img, (28, 28))
        writer_images.append(img)
        writer_labels.append(int(writer_dir))  # Assuming the writer directory is named with a numerical label
#         writer_labels.append(writer_dir)
    
    # images += writer_images
    images.extend(writer_images)
    labels.extend(writer_labels)

# Load the handwriting data (replace with your own data loading code)
# Convert the lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Preprocess the images
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32') / 255.0
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32') / 255.0

# Encode the labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Determine the number of unique labels in your dataset
num_classes = len(label_encoder.classes_)

# Convert the labels to one-hot encoding
y_train = to_categorical(y_train_encoded, num_classes=num_classes)
y_test = to_categorical(y_test_encoded, num_classes=num_classes)

In [8]:
def build_model(hp):
    model = keras.Sequential()
    
    # Convolutional layer
    model.add(keras.layers.Conv2D(
        filters=hp.Int('conv_filters', min_value=16, max_value=64, step=16),
        kernel_size=(3, 3),
        activation='relu',
        input_shape=(28, 28, 1)
    ))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.MaxPooling2D((2, 2)))

    # Flatten the output from the convolutional layer
    model.add(keras.layers.Flatten())
    
    # Dense hidden layer
    model.add(keras.layers.Dense(
        units=hp.Int('dense_units', min_value=64, max_value=256, step=64),
        activation='relu'
    ))
    model.add(keras.layers.Dropout(0.5))

    # Dense output layer
    model.add(keras.layers.Dense(num_classes, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Perform hyperparameter search
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    directory='tuner_directory',
    project_name='writer_identification'
)

tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Get the best hyperparameters
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
best_model = tuner.hypermodel.build(best_hp)

# Train the best model
best_model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Save the trained model
best_model.save('writerIdentifier_model.h5')

# Evaluate the best model
loss, accuracy = best_model.evaluate(X_test, y_test)
print('Loss:', loss)
print('Test accuracy:', accuracy)


Trial 5 Complete [00h 00m 30s]
val_accuracy: 0.5333333611488342

Best val_accuracy So Far: 0.8777777552604675
Total elapsed time: 00h 03m 18s
INFO:tensorflow:Oracle triggered exit
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6008375287055969
Test accuracy: 0.6111111044883728


In [26]:
print('Best Hyperparameters:')
print(best_hp.values)

Best Hyperparameters:
{'conv_filters': 64, 'dense_units': 128}


In [5]:
import os
from PIL import Image

def resizeImage(image_number): 

    input_dir = 'Prediction-Test-Datasets'

    output_dir = 'Resized-Prediction-Test-Datasets'
    target_size = (28, 28)

    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Load the image
    img_path = f"{input_dir}/Predict-{image_number}.jpg"
    img = Image.open(img_path)
        
    # Resize the image while maintaining the aspect ratio using thumbnail method
    img.thumbnail(target_size, Image.ANTIALIAS)
        
    # Create a new image with the target size as canvas
    resized_image = Image.new('L', target_size, 255)
        
    # Paste the resized image onto the canvas
    offset = ((target_size[0] - img.size[0]) // 2, (target_size[1] - img.size[1]) // 2)
    resized_image.paste(img, offset)
        
    return resized_image


In [6]:
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model('writerIdentifier_model.h5')

# Preprocess the input image
image_number = 4
resized_image = resizeImage(image_number)
input_image = np.array(resized_image)
# Normalise
input_image = input_image.reshape(1, 28, 28, 1).astype('float32') / 255.0

# Convert the input image to a TensorFlow tensor
input_tensor = tf.convert_to_tensor(input_image)

# Make predictions
predictions = model(input_tensor)

# Convert the predictions to a NumPy array
predictions = predictions.numpy()


# Get the writer with the highest probability
predicted_writer_index = np.argmax(predictions)
confidence = np.max(predictions)

# Map the predicted index to the actual writer label
# 1 - Janice; 2 - Jasmine
writers = [1, 2]  # List of writer labels used during training
predicted_writer = writers[predicted_writer_index]

# Print the predicted writer and confidence score
print('Predicted Writer:', "Janice" if predicted_writer == 1 else "Jasmine")
print('Confidence:', confidence)

Predicted Writer: Janice
Confidence: 0.53548664


  img.thumbnail(target_size, Image.ANTIALIAS)
