<a href="https://colab.research.google.com/github/Muneeb2721/Urdu-Handwritten/blob/main/Copy_of_Final_Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files
import zipfile
import os

# Upload your dataset folder as a ZIP file
uploaded = files.upload()

# Extract the ZIP file
for filename in uploaded.keys():
    if filename.endswith(".zip"):
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall("/content/")
        print("Files extracted successfully.")


Saving json.zip to json.zip
Files extracted successfully.


In [2]:
import os
import json
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight
import joblib

In [3]:
# Function to load and process the data
def load_data_from_directory(directory_path):
    data = []
    labels = []

    # Iterate over each file in the directory
    for filename in os.listdir(directory_path):
        if filename.endswith(".json"):
            file_path = os.path.join(directory_path, filename)

            # Load the data from the JSON file
            with open(file_path, "r") as f:
                file_data = json.load(f)

            # Extract sequences from the file
            sequences = []
            for character_variation in file_data:
                for i in range(0, len(character_variation), 5):  # Create sequences with 5 time steps
                    sequence = character_variation[i:i+5]
                    if len(sequence) == 5:
                        # Extract dx, dy, and timestamp from the dictionary and flatten into a list
                        flattened_sequence = [[point['dx'], point['dy'], point['timestamp']] for point in sequence]
                        sequences.append(flattened_sequence)

            # Add the sequences to the data list and label (filename without .json) to the labels list
            data.extend(sequences)
            labels.extend([filename.replace(".json", "")] * len(sequences))  # File name is the label

    return data, labels

# Preprocess the data (pad sequences and encode labels)
def preprocess_data(data, labels, sequence_length=50):
    # Pad or truncate sequences to make them of equal length
    sequences = pad_sequences(data, maxlen=sequence_length, dtype='float32', padding='post', truncating='post')

    # Encode the labels (character names)
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)
    labels_encoded = to_categorical(labels_encoded)  # One-hot encoding

    return np.array(sequences), np.array(labels_encoded), label_encoder

# Function to export the trained model
def save_model(model, model_path="trained_model.h5"):
    model.save(model_path)
    print(f"Model saved to {model_path}")

# Function to load the model
def load_trained_model(model_path="trained_model1.h5"):
    return load_model(model_path)

In [4]:
# Main function to load, preprocess, and train the model
def main():
    # Load the data from your directory
    directory_path = "/content/json"
    data, labels = load_data_from_directory(directory_path)

    # Preprocess the data
    sequences, labels_encoded, label_encoder = preprocess_data(data, labels)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(sequences, labels_encoded, test_size=0.2, random_state=42)

    # Compute class weights to handle class imbalance
    class_weights = compute_class_weight('balanced', classes=np.unique(np.argmax(y_train, axis=1)), y=np.argmax(y_train, axis=1)) # Changed this line
    class_weight_dict = dict(zip(np.unique(np.argmax(y_train, axis=1)), class_weights)) # Changed this line

    # Example model (LSTM with increased complexity)
    model = Sequential([
        LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
        Dropout(0.3),
        LSTM(64),
        Dropout(0.3),
        Dense(len(label_encoder.classes_), activation="softmax")
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()

    # Add learning rate scheduler
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)

    # Train the model with class weights, learning rate scheduler, and batch size adjustment
    model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test), class_weight=class_weight_dict, callbacks=[lr_scheduler])

    # Save the trained model
    save_model(model)

    # Save label encoder
    joblib.dump(label_encoder, "label_encoder1.pkl")

# Function to predict a character from user input
def predict_character(user_input, model, label_encoder, sequence_length=50):
    """
    Predict the character based on the user input.
    :param user_input: List of dictionaries containing 'dx', 'dy', 'timestamp'.
    :param model: Trained model to use for predictions.
    :param label_encoder: Label encoder to decode predictions into characters.
    :param sequence_length: The sequence length for padding/truncating.
    :return: Predicted character label.
    """
    # Preprocess the user input
    flattened_input = [[point['dx'], point['dy'], point['timestamp']] for point in user_input]
    padded_input = pad_sequences([flattened_input], maxlen=sequence_length, dtype='float32', padding='post', truncating='post')

    # Make the prediction
    prediction = model.predict(padded_input)

    # Get the class index with the highest probability
    predicted_class_index = np.argmax(prediction)

    # Decode the class index to the actual character label
    predicted_character = label_encoder.inverse_transform([predicted_class_index])[0]

    return predicted_character

# Example of how to use the prediction function
if __name__ == "__main__":
    # Train and save the model first
    main()


  super().__init__(**kwargs)


Epoch 1/10
[1m2312/2312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 10ms/step - accuracy: 0.0083 - loss: 5.5359 - val_accuracy: 0.0085 - val_loss: 5.3862 - learning_rate: 0.0010
Epoch 2/10
[1m2312/2312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 10ms/step - accuracy: 0.0077 - loss: 5.4109 - val_accuracy: 0.0077 - val_loss: 5.3765 - learning_rate: 0.0010
Epoch 3/10
[1m2312/2312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 10ms/step - accuracy: 0.0084 - loss: 5.3939 - val_accuracy: 0.0082 - val_loss: 5.3788 - learning_rate: 0.0010
Epoch 4/10
[1m2312/2312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 10ms/step - accuracy: 0.0079 - loss: 5.4063 - val_accuracy: 0.0111 - val_loss: 5.3569 - learning_rate: 0.0010
Epoch 5/10
[1m2312/2312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 9ms/step - accuracy: 0.0085 - loss: 5.3319 - val_accuracy: 0.0130 - val_loss: 5.2894 - learning_rate: 0.0010
Epoch 6/10
[1m2312/2312[0m [32m━━━━━━━━━━━━━━━━━



Model saved to trained_model.h5


In [5]:
import joblib

# Load the trained model and label encoder
model = load_trained_model("trained_model.h5")
label_encoder = joblib.load("label_encoder1.pkl")

# Sample user input (list of dictionaries with 'dx', 'dy', 'timestamp')
user_input = [
    {"dx": 198.5, "dy": 97, "timestamp": 0},
    {"dx": 198.5, "dy": 97, "timestamp": 1},
    {"dx": 198.5, "dy": 97, "timestamp": 26},
    {"dx": 198.5, "dy": 97, "timestamp": 26},
    {"dx": 198.5, "dy": 97, "timestamp": 50},
]

# Predict the character
predicted_character = predict_character(user_input, model, label_encoder)

# Print the predicted character
print(f"Predicted Character: {predicted_character}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step
Predicted Character: دا
