In [1]:
#✅ Step 1: Upload ZIP Files and Extract
from zipfile import ZipFile
import os

# 📂 Upload ZIP files manually in Colab
from google.colab import files
uploaded = files.upload()  # Upload train_v2.zip, test_v2.zip, validate_v2.zip

# 📦 Unzip all
for filename in uploaded.keys():
    with ZipFile(filename, 'r') as zip_ref:
        folder_name = filename.replace('.zip', '')
        zip_ref.extractall('train_v2')


In [None]:
# Step 2: Load Metadata CSV (written_name_test_v2.csv or similar)
import pandas as pd

# Upload metadata CSV file
files.upload()  # Upload 'written_name_test_v2.csv' or respective CSV

# Load into DataFrame
df = pd.read_csv("written_name_test_v2.csv")
df.columns = df.columns.str.strip()
print(df.head())


In [None]:
#  Step 3: Display Sample Handwritten Images
import matplotlib.pyplot as plt
from PIL import Image

image_files = df['train_v2'].tolist()
labels = df['IDENTITY'].tolist()
sample_path = '/content/train_v2'  # Adjust if you are using other folders

plt.figure(figsize=(12, 12))
for i in range(9):
    img_path = os.path.join(sample_path, image_files[i])
    img = Image.open(img_path)
    plt.subplot(3, 3, i+1)
    plt.imshow(img, cmap='gray')
    plt.title(f"Label: {labels[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical


In [None]:
# Step 4: Preprocess Images
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

image_data = []
target_labels = []

for i in range(len(image_files)):
    try:
        img_path = os.path.join(sample_path, image_files[i])
        img = Image.open(img_path).convert('L')  # grayscale
        img = img.resize((128, 32))  # Resize uniformly
        img_array = img_to_array(img) / 255.0
        image_data.append(img_array)
        target_labels.append(labels[i])
    except:
        continue

X = np.array(image_data)
print("Image Data Shape:", X.shape)

# Convert labels to encoded classes
label_enc = LabelEncoder()
y_encoded = label_enc.fit_transform(target_labels)
y = to_categorical(y_encoded)


In [None]:
#  Step 5: Train-Test Split
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
#  Step 6: Build Model (CNN + LSTM for Handwritten Sequence Prediction)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed, Reshape

model = Sequential()

# CNN Feature Extraction
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 128, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
model.add(Flatten())

# Reshape for RNN
model.add(Reshape((1, -1)))

# LSTM Sequence Modeling
model.add(LSTM(128, return_sequences=False))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


In [None]:
#  Step 7: Train the Model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=15, batch_size=32)


In [None]:
# Step 8: Evaluate the Model
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title("Model Accuracy Over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()


In [None]:
# Step 9: Predict on Sample Data
sample_idx = 10
sample_img = np.expand_dims(X_val[sample_idx], axis=0)
pred_label = model.predict(sample_img)
decoded_label = label_enc.inverse_transform([np.argmax(pred_label)])

plt.imshow(X_val[sample_idx].reshape(32, 128), cmap='gray')
plt.title(f"Predicted: {decoded_label[0]}")
plt.axis('off')
plt.show()
