In [5]:
!pip install timm albumentations --quiet

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pandas as pd
import cv2
import timm
import os
from sklearn.model_selection import train_test_split

In [3]:
!unzip -q "/content/archive (5).zip" -d /content/dataset

In [7]:
class PrescriptionDataset(Dataset):
    def __init__(self, images_folder, label_file, label_to_id=None, transform=None):
        self.images_folder = images_folder
        self.df = pd.read_csv(label_file)
        self.label_to_id = label_to_id if label_to_id else {
            label: idx for idx, label in enumerate(sorted(self.df['MEDICINE_NAME'].unique()))
        }
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_folder, self.df.iloc[idx]['IMAGE'])
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.label_to_id[self.df.iloc[idx]['MEDICINE_NAME']]

        if self.transform:
            image = self.transform(image=image)["image"]
        return image, label

In [11]:
train_transform = A.Compose([
    A.Resize(224, 224),
    A.Rotate(limit=15, p=0.7),
    A.Affine(translate_percent=0.05, scale=(0.9, 1.1), rotate=0, shear=0, p=0.5),
    A.ElasticTransform(alpha=1, sigma=50, p=0.3),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.GaussNoise(p=0.4),
    A.OneOf([
        A.MotionBlur(blur_limit=3, p=1.0),
        A.MedianBlur(blur_limit=3, p=1.0),
        A.Blur(blur_limit=3, p=1.0),
    ], p=0.3),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.3),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

In [12]:
train_csv = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Training/training_labels.csv"
train_images = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Training/training_words"
test_csv = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Testing/testing_labels.csv"
test_images = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Testing/testing_words"
val_csv = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Validation/validation_labels.csv"
val_images = "/content/dataset/Doctor’s Handwritten Prescription BD dataset/Validation/validation_words"


In [13]:
train_dataset = PrescriptionDataset(train_images, train_csv, transform=train_transform)
val_dataset = PrescriptionDataset(val_images, val_csv, label_to_id=train_dataset.label_to_id, transform=val_transform)
test_dataset = PrescriptionDataset(test_images, test_csv, label_to_id=train_dataset.label_to_id, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)


In [14]:
num_classes = len(train_dataset.label_to_id)
model = timm.create_model("tf_efficientnet_b3_ns", pretrained=True, num_classes=num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

  model = create_fn(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

In [15]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=5e-5, weight_decay=0)

In [16]:
def train_model(model, criterion, optimizer, num_epochs=20):
    best_acc = 0.0
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
                loader = train_loader
            else:
                model.eval()
                loader = val_loader

            running_loss, running_corrects = 0.0, 0
            for inputs, labels in loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(loader.dataset)
            epoch_acc = running_corrects.double() / len(loader.dataset)
            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                torch.save(model.state_dict(), "best_model.pth")
                print("Saved Best Model")

    print(f"Best Val Acc: {best_acc:.4f}")
    return model


In [18]:
model = train_model(model, criterion, optimizer, num_epochs=40)

Epoch 1/40
train Loss: 4.4225 Acc: 0.0176
val Loss: 4.2125 Acc: 0.0397
Saved Best Model
Epoch 2/40
train Loss: 4.2139 Acc: 0.0465
val Loss: 3.9633 Acc: 0.1192
Saved Best Model
Epoch 3/40
train Loss: 3.9498 Acc: 0.1096
val Loss: 3.4971 Acc: 0.2474
Saved Best Model
Epoch 4/40
train Loss: 3.5796 Acc: 0.2022
val Loss: 2.8780 Acc: 0.3962
Saved Best Model
Epoch 5/40
train Loss: 3.0489 Acc: 0.3109
val Loss: 2.2291 Acc: 0.5154
Saved Best Model
Epoch 6/40
train Loss: 2.4770 Acc: 0.4542
val Loss: 1.6969 Acc: 0.6359
Saved Best Model
Epoch 7/40
train Loss: 1.9002 Acc: 0.5942
val Loss: 1.3311 Acc: 0.7244
Saved Best Model
Epoch 8/40
train Loss: 1.4129 Acc: 0.7138
val Loss: 1.0376 Acc: 0.7859
Saved Best Model
Epoch 9/40
train Loss: 0.9961 Acc: 0.8109
val Loss: 0.8047 Acc: 0.8359
Saved Best Model
Epoch 10/40
train Loss: 0.7435 Acc: 0.8497
val Loss: 0.6509 Acc: 0.8744
Saved Best Model
Epoch 11/40
train Loss: 0.5636 Acc: 0.8901
val Loss: 0.5664 Acc: 0.8885
Saved Best Model
Epoch 12/40
train Loss: 0.4353

In [22]:
from sklearn.metrics import classification_report

model.load_state_dict(torch.load("best_model.pth"))
model.eval()
correct, total = 0, 0
all_labels = []
all_preds = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())


test_accuracy = correct / total
print(f"Test Accuracy: {test_accuracy:.4f}")
report_dict = classification_report(all_labels, all_preds, target_names=train_dataset.label_to_id.keys(), output_dict=True)
print(f"Average Metrics (weighted): {report_dict['weighted avg']}")

Test Accuracy: 0.8397
Average Metrics (weighted): {'precision': 0.8654352591852591, 'recall': 0.8397435897435898, 'f1-score': 0.8317872488082011, 'support': 780.0}
