#### Renaming Images in Dataset e.g.,1.ا.jpeg

In [2]:
import os

# Base Dataset folder ka path
base_folder = "../Dataset"

# Har Urdu letter folder ke liye loop
for folder_name in os.listdir(base_folder):
    folder_path = os.path.join(base_folder, folder_name)
    
    # Agar folder hai (file nahi)
    if os.path.isdir(folder_path):
        for i, file in enumerate(os.listdir(folder_path), start=1):
            old_path = os.path.join(folder_path, file)
            ext = file.split('.')[-1]  # file extension (jpg/png)
            new_name = f"{i}.{folder_name}.{ext}"  # e.g. 1.ا.jpg
            new_path = os.path.join(folder_path, new_name)
            
            os.rename(old_path, new_path)
        print(f"✅ Renamed all files in: {folder_name}")


✅ Renamed all files in: ب
✅ Renamed all files in: د
✅ Renamed all files in: ھ
✅ Renamed all files in: ی
✅ Renamed all files in: ء
✅ Renamed all files in: ن
✅ Renamed all files in: س
✅ Renamed all files in: ف
✅ Renamed all files in: ش
✅ Renamed all files in: و
✅ Renamed all files in: غ
✅ Renamed all files in: چ
✅ Renamed all files in: ا
✅ Renamed all files in: خ
✅ Renamed all files in: ڈ
✅ Renamed all files in: ٹ
✅ Renamed all files in: ص
✅ Renamed all files in: ز
✅ Renamed all files in: پ
✅ Renamed all files in: ق
✅ Renamed all files in: گ
✅ Renamed all files in: ط
✅ Renamed all files in: م
✅ Renamed all files in: ڑ
✅ Renamed all files in: ذ
✅ Renamed all files in: ع
✅ Renamed all files in: ژ
✅ Renamed all files in: ج
✅ Renamed all files in: ہ
✅ Renamed all files in: ث
✅ Renamed all files in: ے
✅ Renamed all files in: ظ
✅ Renamed all files in: ل
✅ Renamed all files in: ر
✅ Renamed all files in: ک
✅ Renamed all files in: ض
✅ Renamed all files in: ت
✅ Renamed all files in: ح


#### Preprocessing of Data 

In [3]:
import os
import cv2
import numpy as np

# Paths
DATASET_DIR = "../Dataset"
PREPROCESSED_DIR = "../Preprocessed"

# Target image size
IMG_SIZE = (64, 64)

# Make sure Preprocessed folder exists
if not os.path.exists(PREPROCESSED_DIR):
    os.makedirs(PREPROCESSED_DIR)

# Process each Urdu letter folder
for folder_name in os.listdir(DATASET_DIR):
    folder_path = os.path.join(DATASET_DIR, folder_name)
    save_folder = os.path.join(PREPROCESSED_DIR, folder_name)

    # Skip non-folder items
    if not os.path.isdir(folder_path):
        continue

    # Create corresponding folder in Preprocessed
    os.makedirs(save_folder, exist_ok=True)

    # Process all images inside the folder
    for img_file in os.listdir(folder_path):
        try:
            img_path = os.path.join(folder_path, img_file)
            image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
            
            if image is None:
                print(f"⚠️ Skipping unreadable file: {img_path}")
                continue
            
            # Resize to 64x64
            image = cv2.resize(image, IMG_SIZE)
            
            # Normalize pixel values (0–1)
            image = image / 255.0

            # Save preprocessed image
            save_path = os.path.join(save_folder, img_file)
            cv2.imwrite(save_path, (image * 255).astype(np.uint8))
        
        except Exception as e:
            print(f"Error processing {img_file}: {e}")

    print(f"✅ Completed preprocessing for: {folder_name}")

print("\n🎉 All images preprocessed successfully!")


ModuleNotFoundError: No module named 'cv2'

In [None]:
import cv2
import numpy as np

def extract_edge_features(image):
    # Resize and blur slightly to remove noise
    image = cv2.resize(image, (64, 64))
    image = cv2.GaussianBlur(image, (3, 3), 0)

    # Compute gradients (edges)
    gx = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
    gy = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)

    # Compute magnitude and angle
    magnitude, angle = cv2.cartToPolar(gx, gy, angleInDegrees=True)

    # Create a histogram of gradients
    hist, _ = np.histogram(angle, bins=9, range=(0, 180), weights=magnitude)

    # Normalize
    hist = hist / np.linalg.norm(hist) if np.linalg.norm(hist) != 0 else hist
    return hist


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

data = []
labels = []
classes = []

dataset_path = "../Preprocessed"

for idx, folder in enumerate(os.listdir(dataset_path)):
    folder_path = os.path.join(dataset_path, folder)
    
    # Skip non-folder items (like .DS_Store)
    if not os.path.isdir(folder_path):
        continue

    classes.append(folder)

    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            data.append(img)
            labels.append(idx)

X = np.array(data)
y = np.array(labels)

X = X / 255.0
X = X.reshape(-1, 64, 64, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("✅ Data loaded successfully!")
print(f"Total images: {len(X)}")
print(f"Training images: {len(X_train)}")
print(f"Testing images: {len(X_test)}")
print(f"Classes: {classes}")


✅ Data loaded successfully!
Total images: 984
Training images: 787
Testing images: 197
Classes: ['ب', 'د', 'ھ', 'ی', 'ء', 'ن', 'س', 'ف', 'ش', 'و', 'غ', 'چ', 'ا', 'خ', 'ڈ', 'ٹ', 'ص', 'ز', 'پ', 'ق', 'گ', 'ط', 'م', 'ڑ', 'ذ', 'ع', 'ژ', 'ج', 'ہ', 'ث', 'ے', 'ظ', 'ل', 'ر', 'ک', 'ض', 'ت', 'ح']


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

# ---------------------------------
# 🔧 Step 1: Fixed HOG Feature Extractor
# ---------------------------------
def extract_hog_features(image):
    image = cv2.resize(image, (64, 128))  # HOGDescriptor default expects 64x128

    # Create HOG descriptor with proper parameters
    hog = cv2.HOGDescriptor(
        _winSize=(64, 128),
        _blockSize=(16, 16),
        _blockStride=(8, 8),
        _cellSize=(8, 8),
        _nbins=9
    )
    
    h = hog.compute(image)
    return h.flatten()

# ---------------------------------
# 📂 Step 2: Load Dataset
# ---------------------------------
dataset_path = "../Preprocessed"
data, labels = [], []
classes = [f for f in os.listdir(dataset_path) if not f.startswith('.')]

for idx, folder in enumerate(classes):
    folder_path = os.path.join(dataset_path, folder)
    if not os.path.isdir(folder_path):
        continue
    
    for img_name in os.listdir(folder_path):
        if img_name.startswith('.'):
            continue
        img_path = os.path.join(folder_path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            features = extract_hog_features(img)
            data.append(features)
            labels.append(idx)

X = np.array(data)
y = np.array(labels)

print(f"✅ Loaded {len(X)} images from {len(classes)} classes.")

# ---------------------------------
# ✂️ Step 3: Train-Test Split
# ---------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ---------------------------------
# 🧠 Step 4: Train SVM Model
# ---------------------------------
print("🚀 Training HOG+SVM model...")
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train)

# ---------------------------------
# 📊 Step 5: Evaluate
# ---------------------------------
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"✅ HOG+SVM Model trained successfully! Accuracy: {acc*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ---------------------------------
# 💾 Step 6: Save Model
# ---------------------------------
os.makedirs("models", exist_ok=True)
joblib.dump(model, "models/hog_svm_model.pkl")
print("💾 Model saved to 'models/hog_svm_model.pkl'")


✅ Loaded 984 images from 38 classes.
🚀 Training HOG+SVM model...
✅ HOG+SVM Model trained successfully! Accuracy: 47.21%

Classification Report:
               precision    recall  f1-score   support

           0       0.17      0.67      0.27         3
           1       0.33      0.17      0.22         6
           3       1.00      0.11      0.20         9
           5       0.50      0.67      0.57         3
           6       0.33      0.50      0.40         4
           7       0.50      0.80      0.62         5
           8       1.00      1.00      1.00         4
           9       0.67      0.33      0.44         6
          10       0.17      0.50      0.25         4
          11       0.60      0.33      0.43         9
          12       0.86      1.00      0.92        12
          13       0.40      0.40      0.40         5
          14       0.56      0.62      0.59         8
          15       0.17      0.33      0.22         3
          16       0.29      0.50      0.36 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [7]:
# ------------------- 1️⃣ Imports -------------------
import os
import cv2
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# ------------------- 2️⃣ Paths -------------------
original_path = "Dataset"
augmented_path = "Augmented_Dataset"

# ------------------- 3️⃣ Load Dataset -------------------
def load_images_from_folder(folder_path):
    images = []
    labels = []
    for folder in os.listdir(folder_path):
        folder_full = os.path.join(folder_path, folder)
        if not os.path.isdir(folder_full):
            continue
        for img_name in os.listdir(folder_full):
            if img_name.startswith('.'):
                continue
            img_path = os.path.join(folder_full, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                print("Skipped invalid image:", img_path)
                continue
            img = cv2.resize(img, (128,128))
            _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
            images.append(img)
            labels.append(folder)
    return images, labels

print("Loading original dataset...")
orig_images, orig_labels = load_images_from_folder(original_path)
print("Loading augmented dataset...")
aug_images, aug_labels = load_images_from_folder(augmented_path)

# Combine datasets
all_images = orig_images + aug_images
all_labels = orig_labels + aug_labels

# Convert to numpy arrays
X = np.array(all_images, dtype=np.float32)
y = np.array(all_labels)

# Normalize
X = X / 255.0

# Flatten images for MLP
X = X.reshape(X.shape[0], -1)  # 128*128 = 16384 features

# Encode labels
encoder = LabelEncoder()
y_enc = encoder.fit_transform(y)
y_cat = to_categorical(y_enc)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.1, random_state=42, stratify=y_enc
)

# ------------------- 4️⃣ Build ANN Model -------------------
num_classes = y_cat.shape[1]
input_size = X_train.shape[1]

model = Sequential()
model.add(Dense(512, input_shape=(input_size,), activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# ------------------- 5️⃣ Train Model -------------------
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=50,
                    batch_size=32)

# ------------------- 6️⃣ Evaluate Model -------------------
score = model.evaluate(X_test, y_test)
print("Test Accuracy:", score[1])

# Predictions
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

# Classification report
print("\nClassification Report:\n")
print(classification_report(y_true_labels, y_pred_labels, target_names=encoder.classes_))

# Confusion matrix
cm = confusion_matrix(y_true_labels, y_pred_labels)
plt.figure(figsize=(12,10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# ------------------- 7️⃣ Predict Single Image -------------------
def predict_image(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print("Invalid image path")
        return
    img = cv2.resize(img, (128,128))
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    img = img.astype(np.float32)/255.0
    img_flat = img.reshape(1,-1)
    pred = model.predict(img_flat)
    label = encoder.inverse_transform([np.argmax(pred)])
    print("Predicted Letter:", label[0])

# Example Usage
# predict_image('Dataset/ا/img1.jpg')


ModuleNotFoundError: No module named 'tensorflow.keras'