In [None]:
#Data preprocessing
import os
import json
import torch
import pandas as pd
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image


def load_annotations(annotations_folder):
    annotations = {}

    for json_file in os.listdir(annotations_folder):
        if json_file.endswith(".json"):  
            img_name = json_file.replace(".json", ".jpg")  

            json_path = os.path.join(annotations_folder, json_file)

            try:
                with open(json_path, "r") as file:
                    data = json.load(file)

                    
                    # print(f"🔍 JSON Content [{json_file}]:", json.dumps(data, indent=4))

                    
                    if isinstance(data, list) and len(data) > 0:
                        annotations[img_name] = data[0].get("text", "")
                    elif isinstance(data, dict):
                        annotations[img_name] = data.get("text", "")
                    else:
                        annotations[img_name] = ""  

            except Exception as e:
                print(f"⚠️ Error reading {json_file}: {e}")

    return annotations

train_dir = 'D:/Projects/Final_project/Dataset/data/train'
test_dir = 'D:/Projects/Final_project/Dataset/data/test'

train_annotations = load_annotations(train_dir)
test_annotations = load_annotations(test_dir)

train_df = pd.DataFrame(list(train_annotations.items()), columns=["image_name", "text"])
test_df = pd.DataFrame(list(test_annotations.items()), columns=["image_name", "text"])

print(f"✅ Loaded {len(train_df)} train samples and {len(test_df)} test samples.")

train_images = set(os.listdir(train_dir))
test_images = set(os.listdir(test_dir))

missing_train = set(train_df["image_name"]) - train_images
missing_test = set(test_df["image_name"]) - test_images

if missing_train:
    print(f"⚠️ Missing train images: {missing_train}")
if missing_test:
    print(f"⚠️ Missing test images: {missing_test}")

train_df = train_df[~train_df["image_name"].isin(missing_train)]
test_df = test_df[~test_df["image_name"].isin(missing_test)]

unique_labels = sorted(set(train_df["text"].tolist() + test_df["text"].tolist()))
label_to_index = {label: idx for idx, label in enumerate(unique_labels)}

train_df["label"] = train_df["text"].map(label_to_index)
test_df["label"] = test_df["text"].map(label_to_index)

print("🔍 Missing labels in train set:", train_df["label"].isna().sum())
print("🔍 Missing labels in test set:", test_df["label"].isna().sum())

train_df = train_df.dropna()
test_df = test_df.dropna()

# Define image transformations (preprocessing)
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Custom Dataset for loading images and labels
class GNHKDataset(Dataset):
    def __init__(self, dataframe, image_folder, transform=None):
        self.dataframe = dataframe
        self.image_folder = image_folder
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]["image_name"]
        img_path = os.path.join(self.image_folder, img_name)

        if not os.path.exists(img_path):
            print(f"⚠️ Warning: Image {img_path} not found! Skipping...")
            return None  

        try:
            image = Image.open(img_path)
            image = image.convert("RGB") if image.mode != "RGB" else image  
        except Exception as e:
            print(f"⚠️ Error opening {img_path}: {e}")
            return None

        if self.transform:
            image = self.transform(image)

        label = self.dataframe.iloc[idx]["label"]
        return image, torch.tensor(label, dtype=torch.long)


def collate_fn(batch):
    batch = [b for b in batch if b is not None]  
    return torch.utils.data.dataloader.default_collate(batch)

# Create datasets
train_dataset = GNHKDataset(train_df, train_dir, transform=train_transforms)
test_dataset = GNHKDataset(test_df, test_dir, transform=test_transforms)

# DataLoaders for batch processing
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

print(f"✅ Final Train Dataset: {len(train_dataset)} samples")
print(f"✅ Final Test Dataset: {len(test_dataset)} samples")


✅ Loaded 515 train samples and 172 test samples.
🔍 Missing labels in train set: 0
🔍 Missing labels in test set: 0
✅ Final Train Dataset: 515 samples
✅ Final Test Dataset: 172 samples


In [None]:
#Fine-tuning

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.fc.in_features
num_classes = len(label_to_index)  
model.fc = nn.Linear(num_ftrs, num_classes)

# Initialize new FC layer
nn.init.xavier_uniform_(model.fc.weight)
model.fc.bias.data.fill_(0.01)

model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001, weight_decay=1e-4)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())

# Train the model
epochs = 15  
best_accuracy = 0.0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}", leave=False):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        
        with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    # Training loss and accuracy
    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total

    # Validation loop
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    # Validation accuracy & loss
    val_loss /= len(test_loader)
    val_accuracy = 100 * correct / total

    print(f"Epoch {epoch + 1}/{epochs} - "
          f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

    # Learning rate scheduler step
    scheduler.step()

    # Save best model
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        torch.save(model.state_dict(), "best_model.pth")
        print(f"✅ Best model saved with validation accuracy: {best_accuracy:.2f}%")

print("🎉 Training complete!")


  scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
                                                           

Epoch 1/15 - Train Loss: 7.1983, Train Accuracy: 4.47%, Val Loss: 8.0408, Validation Accuracy: 2.91%
✅ Best model saved with validation accuracy: 2.91%


                                                           

Epoch 2/15 - Train Loss: 5.5827, Train Accuracy: 4.08%, Val Loss: 8.5043, Validation Accuracy: 4.07%
✅ Best model saved with validation accuracy: 4.07%


                                                           

Epoch 3/15 - Train Loss: 4.8329, Train Accuracy: 8.35%, Val Loss: 8.4485, Validation Accuracy: 3.49%


                                                           

Epoch 4/15 - Train Loss: 4.3993, Train Accuracy: 12.82%, Val Loss: 8.3071, Validation Accuracy: 4.07%


                                                           

Epoch 5/15 - Train Loss: 3.9167, Train Accuracy: 20.00%, Val Loss: 8.2803, Validation Accuracy: 7.56%
✅ Best model saved with validation accuracy: 7.56%


                                                           

Epoch 6/15 - Train Loss: 3.4109, Train Accuracy: 26.80%, Val Loss: 8.2995, Validation Accuracy: 7.56%


                                                           

Epoch 7/15 - Train Loss: 3.3403, Train Accuracy: 33.01%, Val Loss: 8.3859, Validation Accuracy: 8.14%
✅ Best model saved with validation accuracy: 8.14%


                                                           

Epoch 8/15 - Train Loss: 3.1262, Train Accuracy: 38.83%, Val Loss: 8.3817, Validation Accuracy: 8.72%
✅ Best model saved with validation accuracy: 8.72%


                                                           

Epoch 9/15 - Train Loss: 2.9863, Train Accuracy: 44.85%, Val Loss: 8.4610, Validation Accuracy: 8.14%


                                                            

Epoch 10/15 - Train Loss: 2.8181, Train Accuracy: 48.74%, Val Loss: 8.4459, Validation Accuracy: 9.88%
✅ Best model saved with validation accuracy: 9.88%


                                                            

Epoch 11/15 - Train Loss: 2.5458, Train Accuracy: 55.15%, Val Loss: 8.4859, Validation Accuracy: 8.72%


                                                            

Epoch 12/15 - Train Loss: 2.5986, Train Accuracy: 57.86%, Val Loss: 8.5429, Validation Accuracy: 9.88%


                                                            

Epoch 13/15 - Train Loss: 2.4883, Train Accuracy: 61.94%, Val Loss: 8.5670, Validation Accuracy: 10.47%
✅ Best model saved with validation accuracy: 10.47%


                                                            

Epoch 14/15 - Train Loss: 2.3429, Train Accuracy: 66.21%, Val Loss: 8.5580, Validation Accuracy: 8.72%


                                                            

Epoch 15/15 - Train Loss: 2.3901, Train Accuracy: 63.30%, Val Loss: 8.6190, Validation Accuracy: 7.56%
🎉 Training complete!


In [None]:
#Inference model

import cv2
import pytesseract
import numpy as np
from PIL import Image
import os

# Set Tesseract-OCR Path
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

image_path = "D:/Projects/Final_project/Dataset/data/test/eng_NA_128.jpg"

if not os.path.exists(image_path):
    raise FileNotFoundError(f"Error: Image file not found at {image_path}")

image = cv2.imread(image_path)

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

gray = cv2.GaussianBlur(gray, (3, 3), 0)

_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

kernel = np.ones((1, 1), np.uint8)
binary = cv2.erode(binary, kernel, iterations=1)
binary = cv2.dilate(binary, kernel, iterations=1)

edges = cv2.Canny(binary, 50, 150)
binary = cv2.bitwise_or(binary, edges)

processed_image_path = "processed_image.png"
cv2.imwrite(processed_image_path, binary)

if not os.path.exists(processed_image_path):
    raise FileNotFoundError("Processed image was not saved successfully!")

# ✅ TRY OCR WITHOUT CUSTOM CONFIG FIRST
try:
    text = pytesseract.image_to_string(Image.open(processed_image_path), lang="eng")

    # DEBUG: Check if OCR output is empty
    if not text.strip():
        print("Warning: OCR returned empty text. Check the processed image.")

except Exception as e:
    print("OCR Error:", e)
    text = ""  

def clean_text(text):
    text = text.replace("|", "I")  
    text = text.replace("  ", " ")  
    text = text.strip()
    return text

cleaned_text = clean_text(text)

print("\n🔍 Extracted Text:\n", cleaned_text)

# Save Extracted Text to a File
output_text_path = "extracted_text.txt"
with open(output_text_path, "w", encoding="utf-8") as f:
    f.write(cleaned_text)

print(f"✅ Extracted text saved to {output_text_path}")



🔍 Extracted Text:
 Cred , Weep wie cote.
otet
2 go te you fr Sate t a

Lerd. every thing. you bore Gives wie © ares
You law wrode my tite Secuve

Hy heact glad Jey en my fevaue
My bedy) algo wit be Secnve I

You wrt qi“’ me. endless, Pleasu ves ak

Youle an Lion d "Bowe port of Péal we
✅ Extracted text saved to extracted_text.txt
