<a href="https://colab.research.google.com/github/Lakshitalearning/CODSOFT/blob/main/Handwritten_text_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
!pip install tensorflow==2.13.0
!pip install numpy==1.23.4
print(tf.__version__)
print(np.__version__)

2.13.0
1.23.4


In [7]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, TimeDistributed, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
data = np.load('deepwriting_training.npz', allow_pickle=True)
X = data['strokes']
y = data['char_labels']

# Flatten the y array and find unique elements
flat_y = np.concatenate(y)
num_classes = len(np.unique(flat_y))

# Calculate mean and std for normalization
mean = np.mean([np.mean(seq, axis=0) for seq in X], axis=0)
std = np.std([np.std(seq, axis=0) for seq in X], axis=0)
epsilon = 1e-8

def preprocess(X_batch, y_batch):
    X_padded = pad_sequences(X_batch, padding='post', dtype='float32', maxlen=None)
    y_padded = pad_sequences(y_batch, padding='post')
    X_normalized = (X_padded - mean) / (std + epsilon)
    y_categorical = np.array([to_categorical(seq, num_classes=num_classes) for seq in y_padded])
    return X_normalized, y_categorical

def generator(X, y, batch_size=64):
    while True:
        for start in range(0, len(X), batch_size):
            end = min(start + batch_size, len(X))
            X_batch = X[start:end]
            y_batch = y[start:end]
            yield preprocess(X_batch, y_batch)

# Determine the shape of the input data after padding
X_padded, _ = preprocess(X[:1], y[:1])
input_shape = (X_padded.shape[1], X_padded.shape[2])

# Define the model
# model = Sequential([
#     LSTM(32, return_sequences=True, input_shape=input_shape),  # Increased LSTM units for better capacity
#     TimeDistributed(Dense(num_classes, activation='softmax'))
# ])
model = Sequential([
  LSTM(64, return_sequences=True, input_shape=input_shape),  # Increased LSTM units
  LSTM(32, return_sequences=True),  # Added another LSTM layer
  TimeDistributed(Dense(num_classes, activation='softmax'))
])


# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, restore_best_weights=True)

# Train the model
batch_size = 64
steps_per_epoch = len(X) // batch_size
history = model.fit(generator(X, y, batch_size=batch_size), steps_per_epoch=steps_per_epoch, epochs=5, callbacks=[early_stopping])

# Save the model for future use
model.save('handwriting_model.h5')

# Debugging: Check training history
print("Training History:", history.history)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training History: {'loss': [2.6002981662750244, 1.816941499710083, 1.5630899667739868, 1.4181619882583618, 1.3273788690567017], 'accuracy': [0.3426077961921692, 0.5008082985877991, 0.5627434253692627, 0.5992001295089722, 0.6217368245124817]}
