In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from PIL import Image
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np

In [2]:
!unzip -q "/content/archive (5).zip" -d /content/dataset

In [4]:
BATCH_SIZE = 32
NUM_EPOCHS = 20
LR = 1e-4
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
train_dir = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Training/training_words"
train_csv = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Training/training_labels.csv"

val_dir = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Validation/validation_words"
val_csv = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Validation/validation_labels.csv"

test_dir = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Testing/testing_words"
test_csv = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Testing/testing_labels.csv"


In [15]:
class PrescriptionDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
        self.classes = sorted(self.data['MEDICINE_NAME'].unique())
        self.class_to_idx = {c: i for i, c in enumerate(self.classes)}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data.iloc[idx]['IMAGE'])
        image = Image.open(img_path).convert("RGB")
        # Resize image to a consistent size before applying transformations
        image = image.resize((224, 224))
        image = np.array(image)

        label = self.class_to_idx[self.data.iloc[idx]['MEDICINE_NAME']]

        if self.transform:
            image = self.transform(image=image)['image']

        return image, label

In [16]:
train_transform = A.Compose([
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=30, p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.GaussianBlur(p=0.3),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

val_test_transform = A.Compose([
    A.Resize(height=224, width=224),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

In [17]:
train_dataset = PrescriptionDataset(train_csv, train_dir, transform=train_transform)
label_encoder = train_dataset.class_to_idx

val_dataset = PrescriptionDataset(val_csv, val_dir, transform=val_test_transform)
test_dataset = PrescriptionDataset(test_csv, test_dir, transform=val_test_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

NUM_CLASSES = len(label_encoder)
print("Number of classes:", NUM_CLASSES)

Number of classes: 78


In [18]:
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)




In [19]:
def train_one_epoch(loader, model, optimizer, criterion):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    return running_loss/total, correct/total

def evaluate(loader, model, criterion):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return running_loss/total, correct/total

In [20]:
for epoch in range(NUM_EPOCHS):
    train_loss, train_acc = train_one_epoch(train_loader, model, optimizer, criterion)
    val_loss, val_acc = evaluate(val_loader, model, criterion)
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")


Epoch 1/20 | Train Acc: 0.1397 | Val Acc: 0.3769
Epoch 2/20 | Train Acc: 0.5087 | Val Acc: 0.6641
Epoch 3/20 | Train Acc: 0.7526 | Val Acc: 0.8064
Epoch 4/20 | Train Acc: 0.8744 | Val Acc: 0.8603
Epoch 5/20 | Train Acc: 0.9228 | Val Acc: 0.8923
Epoch 6/20 | Train Acc: 0.9513 | Val Acc: 0.8974
Epoch 7/20 | Train Acc: 0.9660 | Val Acc: 0.9038
Epoch 8/20 | Train Acc: 0.9679 | Val Acc: 0.9192
Epoch 9/20 | Train Acc: 0.9740 | Val Acc: 0.9282
Epoch 10/20 | Train Acc: 0.9801 | Val Acc: 0.9321
Epoch 11/20 | Train Acc: 0.9869 | Val Acc: 0.9513
Epoch 12/20 | Train Acc: 0.9913 | Val Acc: 0.9385
Epoch 13/20 | Train Acc: 0.9837 | Val Acc: 0.9333
Epoch 14/20 | Train Acc: 0.9872 | Val Acc: 0.9269
Epoch 15/20 | Train Acc: 0.9897 | Val Acc: 0.9179
Epoch 16/20 | Train Acc: 0.9901 | Val Acc: 0.9410
Epoch 17/20 | Train Acc: 0.9872 | Val Acc: 0.9244
Epoch 18/20 | Train Acc: 0.9881 | Val Acc: 0.9218
Epoch 19/20 | Train Acc: 0.9913 | Val Acc: 0.9372
Epoch 20/20 | Train Acc: 0.9917 | Val Acc: 0.9346


In [21]:
test_loss, test_acc = evaluate(test_loader, model, criterion)
print(f"Final Test Accuracy: {test_acc:.4f}")

Final Test Accuracy: 0.8654


In [8]:
!pip install albumentations==1.1.0



In [23]:
from sklearn.metrics import precision_recall_fscore_support, classification_report

model.eval()
all_labels = []
all_preds = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

precision, recall, fscore, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')
report = classification_report(all_labels, all_preds, target_names=train_dataset.classes)

print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"Accuracy: {test_acc:.4f} F1 Score (weighted): {fscore:.4f}")
print("\nClassification Report:")
print(report)

Precision (weighted): 0.8896
Recall (weighted): 0.8654
Accuracy: 0.8654 F1 Score (weighted): 0.8637

Classification Report:
              precision    recall  f1-score   support

         Ace       1.00      0.90      0.95        10
       Aceta       0.91      1.00      0.95        10
     Alatrol       1.00      0.80      0.89        10
      Amodis       1.00      1.00      1.00        10
     Atrizin       0.71      1.00      0.83        10
      Axodin       1.00      1.00      1.00        10
          Az       0.90      0.90      0.90        10
  Azithrocin       1.00      1.00      1.00        10
       Azyth       0.91      1.00      0.95        10
      Bacaid       0.83      1.00      0.91        10
    Backtone       1.00      0.90      0.95        10
    Baclofen       0.90      0.90      0.90        10
      Baclon       1.00      1.00      1.00        10
      Bacmax       1.00      1.00      1.00        10
       Beklo       1.00      0.70      0.82        10
     Bicozi