# 🧠 CNN Classifier for Handwritten Hebrew Letters
This notebook implements a deep CNN to classify handwritten Hebrew letters using the HHD dataset.

**Experiments:**
- Training without data augmentation
- Training with data augmentation


## 📦 1. Imports and Setup

# 🧠 CNN Classifier for Handwritten Hebrew Letters

## 🧼 2. Preprocessing Function

In [None]:
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator


## 📥 3. Dataset Loader

In [None]:
def preprocess_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None
    h, w = img.shape
    if h > w:
        pad = (h - w) // 2
        img = cv2.copyMakeBorder(img, 0, 0, pad, h - w - pad, cv2.BORDER_CONSTANT, value=255)
    elif w > h:
        pad = (w - h) // 2
        img = cv2.copyMakeBorder(img, pad, w - h - pad, 0, 0, cv2.BORDER_CONSTANT, value=255)
    img = cv2.resize(img, (32, 32))
    img = 255 - img
    return img.astype(np.float32) / 255.0

## 🔀 4. Load and Split Dataset

In [None]:
def load_dataset(base_dir):
    X, y = [], []
    for label in range(27):
        folder = os.path.join(base_dir, str(label))
        if not os.path.exists(folder): continue
        for fname in os.listdir(folder):
            img_path = os.path.join(folder, fname)
            img = preprocess_image(img_path)
            if img is not None:
                X.append(img.reshape(32, 32, 1))
                y.append(label)
    return np.array(X), np.array(y)

## 🧠 5. CNN Model Architecture

In [None]:
X, y = load_dataset('processed_hhd/train')
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, stratify=y, random_state=42)
X_test, y_test = load_dataset('processed_hhd/test')

## 🚂 6. Train CNN (No Augmentation)

In [None]:
def build_cnn():
    model = Sequential()
    for filters in [32, 64, 128]:
        model.add(Conv2D(filters, (3, 3), activation='relu', padding='same'))
        model.add(Conv2D(filters, (3, 3), activation='relu', padding='same'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(27, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

## 🚀 7. Train CNN (With Augmentation)

In [None]:
model = build_cnn()
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

## 📉 8. Plot Training vs Validation Loss

In [None]:
datagen = ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    rotation_range=10,
    shear_range=0.2,
    brightness_range=(0.2, 1.8))
datagen.fit(X_train)
model_aug = build_cnn()
history_aug = model_aug.fit(datagen.flow(X_train, y_train, batch_size=32),
                             validation_data=(X_val, y_val), epochs=50)

## 📊 9. Evaluate on Test Set and Save Results

In [None]:
plt.plot(history.history['loss'], label='Train Loss (no aug)')
plt.plot(history.history['val_loss'], label='Val Loss (no aug)')
plt.plot(history_aug.history['loss'], label='Train Loss (aug)')
plt.plot(history_aug.history['val_loss'], label='Val Loss (aug)')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Training and Validation Loss')
plt.legend(); plt.grid(True); plt.savefig('loss_curve.png'); plt.show()

In [None]:
y_pred = np.argmax(model_aug.predict(X_test), axis=1)
cm = confusion_matrix(y_test, y_pred)
acc_per_class = cm.diagonal() / cm.sum(axis=1)
for i, acc in enumerate(acc_per_class):
    print(f'Letter {i}: {acc:.2f}')
print('Average accuracy:', acc_per_class.mean())
pd.DataFrame(cm).to_csv('confusion_matrix.csv', index=False)
with open('results.txt', 'w') as f:
    f.write('Letter    Accuracy\n')
    for i, acc in enumerate(acc_per_class):
        f.write(f'{i:<10}{acc:.4f}\n')
    f.write(f'\nAverage accuracy: {acc_per_class.mean():.4f}\n')