In [48]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, TimeDistributed, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
data = np.load(r"D:\CODSOFT-TASK\TASK5-Handwritten-text-Generator\deepwriting_validation.npz", allow_pickle=True)
X = data['strokes']
y = data['char_labels']

# Flatten the y array and find unique elements
flat_y = np.concatenate(y)
num_classes = np.max(flat_y) + 1  

# Calculate mean and std for normalization
mean = np.mean([np.mean(seq, axis=0) for seq in X], axis=0)
std = np.std([np.std(seq, axis=0) for seq in X], axis=0)
epsilon = 1e-8

def preprocess(X_batch, y_batch):
    X_padded = pad_sequences(X_batch, padding='post', dtype='float32', maxlen=None)
    y_padded = pad_sequences(y_batch, padding='post')
    X_normalized = (X_padded - mean) / (std + epsilon)
    y_categorical = np.array([to_categorical(seq, num_classes=num_classes) for seq in y_padded])
    return X_normalized, y_categorical

def generator(X, y, batch_size=64):
    while True:
        for start in range(0, len(X), batch_size):
            end = min(start + batch_size, len(X))
            X_batch = X[start:end]
            y_batch = y[start:end]
            yield preprocess(X_batch, y_batch)

# Determine the shape of the input data after padding
X_padded, _ = preprocess(X[:1], y[:1])
input_shape = (X_padded.shape[1], X_padded.shape[2])

# Define the model
# model = Sequential([
#     LSTM(32, return_sequences=True, input_shape=input_shape),  # Increased LSTM units for better capacity
#     TimeDistributed(Dense(num_classes, activation='softmax'))
# ])
model = Sequential([
  LSTM(64, return_sequences=True, input_shape=input_shape),  # Increased LSTM units
  LSTM(32, return_sequences=True),  # Added another LSTM layer
  TimeDistributed(Dense(num_classes, activation='softmax'))
])


# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, restore_best_weights=True)

# Train the model
batch_size = 64
steps_per_epoch = len(X) // batch_size
history = model.fit(generator(X, y, batch_size=batch_size), steps_per_epoch=steps_per_epoch, epochs=5, callbacks=[early_stopping])

# Save the model for future use
from tensorflow.keras.saving import save_model
model.save('handwriting_model.keras')
# Debugging: Check training history
print("Training History:", history.history)

Epoch 1/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 759ms/step - accuracy: 0.1473 - loss: 4.1590
Epoch 2/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 769ms/step - accuracy: 0.3120 - loss: 3.6697
Epoch 3/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 706ms/step - accuracy: 0.3274 - loss: 3.2303
Epoch 4/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 714ms/step - accuracy: 0.3291 - loss: 2.9853
Epoch 5/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 728ms/step - accuracy: 0.3267 - loss: 2.8844
Training History: {'accuracy': [0.23788614571094513, 0.3361797332763672, 0.33408981561660767, 0.3351128399372101, 0.337586373090744], 'loss': [4.084088325500488, 3.542590618133545, 3.1490907669067383, 2.940845489501953, 2.8420093059539795]}
