<a href="https://colab.research.google.com/github/SapanaDashoni15/MedSarthi-/blob/main/GGH'25_semi_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Dependencies

In [1]:
pip install torch torchvision torchaudio numpy opencv-python albumentations jiwer


Collecting jiwer
  Downloading jiwer-3.1.0-py3-none-any.whl.metadata (2.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidi

# Data Preprocessing

In [2]:
import cv2
import numpy as np
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader

# Define augmentations (random noise, blur, rotation, contrast adjustments)
transform = A.Compose([
    A.Resize(128, 32),  # Standard input size for CRNN models
    A.Rotate(limit=5, border_mode=cv2.BORDER_CONSTANT),
    A.GaussianBlur(blur_limit=(3,5)),
    A.RandomBrightnessContrast(p=0.2),
    A.Normalize(mean=(0.5,), std=(0.5,)),
    ToTensorV2()
])

# Custom dataset loader
class PrescriptionDataset(Dataset):
    def __init__(self, img_paths, labels, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img = cv2.imread(self.img_paths[idx], cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (128, 32))  # Resize to fit CNN input
        label = self.labels[idx]

        if self.transform:
            img = self.transform(image=img)['image']

        return img, label

# Sample usage:
# dataset = PrescriptionDataset(image_paths, labels, transform=transform)
# dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


# Build CNN + BiLSTM + CTC Model


In [5]:
import torch.nn as nn
import torch.nn.functional as F

class CNN_BiLSTM_CTC(nn.Module):
    def __init__(self, num_classes):
        super(CNN_BiLSTM_CTC, self).__init__()

        # CNN Feature Extractor
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),  # (Batch, 1, 128, 32)
            nn.ReLU(),
            nn.MaxPool2d((2,2)),  # (Batch, 64, 64, 16)

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2,2)),  # (Batch, 128, 32, 8)

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2,2)),  # (Batch, 256, 16, 4)

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2,1)),  # (Batch, 512, 8, 4)
        )

        # BiLSTM for Sequence Processing
        self.lstm = nn.LSTM(input_size=512, hidden_size=256, bidirectional=True, batch_first=True)

        # Fully Connected Layer (for character classification)
        self.fc = nn.Linear(512, num_classes)  # Output characters + blank for CTC loss

    def forward(self, x):
        x = self.conv_layers(x)  # CNN feature extraction
        x = x.permute(0, 3, 1, 2)  # Change order to (batch, width, height, channels)
        x = x.squeeze(2)  # Remove unnecessary height dimension

        x, _ = self.lstm(x)  # Pass through BiLSTM
        x = self.fc(x)  # Character classification

        return x

# Define number of output classes (including blank for CTC loss)
num_classes = 40  # Adjust based on character set
model = CNN_BiLSTM_CTC(num_classes)


# Define CTC Loss & Training Setup

In [6]:
import torch.optim as optim

# CTC Loss Function
ctc_loss = nn.CTCLoss()

# Optimizer & Learning Rate Scheduler
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)


# Load Image Paths & Labels

In [12]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [15]:
!ls /content/drive/MyDrive/YoloDataset/


data.yaml  README.dataset.txt  README.roboflow.txt  test  train  valid


In [16]:
!cat /content/drive/MyDrive/YoloDataset/data.yaml


train: train/images
val: valid/images
test: test/images

names: 
  0: A to Z
  1: A-C
  2: A-Calm
  3: ATOZ Senior
  4: ATV
  5: AU-20
  6: Abdolax
  7: Abecab
  8: Abetis Plus
  9: Ace
  10: Ace Plus
  11: Acelex
  12: Acifix
  13: Aciphin
  14: Acliz Plus
  15: Acos
  16: Actrapid
  17: Adam 33
  18: Airflow
  19: Alarup
  20: Alatrol
  21: Alcet
  22: Alex
  23: Alfumax ER
  24: Algecal D
  25: Algecal DX
  26: Algicid DX
  27: Algin
  28: Algita D
  29: Almex
  30: Alneed
  31: Alphapress
  32: Alve
  33: Ambrox
  34: Amdocal
  35: Amikacin
  36: Amilin
  37: Amira
  38: Amit
  39: Amitriptyline
  40: Anadol
  41: Anaflex Max
  42: Ancor
  43: Androcap
  44: Angenta
  45: Anil
  46: Anset
  47: Antacid Plus
  48: Antazol
  49: Anustat
  50: Anuva
  51: Apitac
  52: Aquafresh
  53: Arbit
  54: Arilin
  55: Aristovit
  56: Arsenor
  57: Ascobex
  58: Atin
  59: Atova
  60: Atrizin
  61: Atropin
  62: Augment
  63: Avas
  64: Avaspray
  65: Avenac
  66: Avlolac
  67: Avolac
  68: Avot

In [17]:
!ls /content/drive/MyDrive/YoloDataset/train/images
!ls /content/drive/MyDrive/YoloDataset/train/labels


20230314_201442-2-_jpg.rf.d33f9e00265e00384d8434b2ff72e0c9.jpg
20230314_201536-1_jpg.rf.b0616762032ce568ca0a813641cc5822.jpg
20230314_201536-1_jpg.rf.b127397d99c762a6f3b9018392c91657.jpg
20230314_202255-1_jpg.rf.8514c7b2bcde4bf87695d181b88af778.jpg
273807787_643771493514240_5221570911420136102_n_jpg.rf.99c6e0cc59e4f30465233855dd3ffa01.jpg
275731693_1749841385351211_1717365577871896825_n_jpg.rf.23f3d349caec4a9ce35e6161ec48707d.jpg
275731693_1749841385351211_1717365577871896825_n_jpg.rf.68549cf60a6ee68766e9b48d5ac2c5e7.jpg
277149366_3185720878362691_2590983567990434148_n_jpg.rf.deb26d39288b6b6bc36b4f7caf395de4.jpg
280396103_3253326008236156_1246680013249511505_n_jpg.rf.da4e90588765ad811f1c58b5d325c054.jpg
280472860_3253326101569480_656127941706098227_n_jpg.rf.6aa204eae6e67de0d3439ffe851302e6.jpg
280652500_3255701027998654_3621167942555674620_n_jpg.rf.388512986427e9e157d1e51fd7ba6ab5.jpg
280652500_3255701027998654_3621167942555674620_n_jpg.rf.ca17198cd9c31e02ed8e4832ff515987.jpg
280708981

In [13]:
import os

dataset_path = "/content/drive/MyDrive/YoloDataset"

# Check folder contents
print("Train Folder:", os.listdir(os.path.join(dataset_path, "train")))
print("Valid Folder:", os.listdir(os.path.join(dataset_path, "valid")))
print("Test Folder:", os.listdir(os.path.join(dataset_path, "test")))


Train Folder: ['labels', 'images', 'labels.cache']
Valid Folder: ['images', 'labels', 'labels.cache']
Test Folder: ['images', 'labels']


In [21]:
import glob

train_path_images = os.path.join(dataset_path, "train/images")
train_path_labels = os.path.join(dataset_path, "train/labels")
valid_path_images = os.path.join(dataset_path, "valid/images")
valid_path_labels = os.path.join(dataset_path, "valid/labels")
test_path_images = os.path.join(dataset_path, "test/images")
test_path_labels = os.path.join(dataset_path, "test/labels")

# Get image and label paths
train_images = sorted(glob.glob(os.path.join(train_path_images, "*.jpg")))  # Adjust for .png if needed
train_labels = sorted(glob.glob(os.path.join(train_path_labels, "*.txt")))

valid_images = sorted(glob.glob(os.path.join(valid_path_images, "*.jpg")))
valid_labels = sorted(glob.glob(os.path.join(valid_path_labels, "*.txt")))

test_images = sorted(glob.glob(os.path.join(test_path_images, "*.jpg")))
test_labels = sorted(glob.glob(os.path.join(test_path_labels, "*.txt")))

# Print some sample data
print(f"Found {len(train_images)} train images and {len(train_labels)} train labels.")
print(f"Found {len(valid_images)} valid images and {len(valid_labels)} valid labels.")
print(f"Found {len(test_images)} test images and {len(test_labels)} test labels.")


Found 374 train images and 374 train labels.
Found 75 valid images and 75 valid labels.
Found 40 test images and 40 test labels.


# Training Loop

In [22]:
def train(model, dataloader, optimizer, ctc_loss, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for images, labels in dataloader:
            images = images.unsqueeze(1).float()  # Add channel dimension

            # Convert labels to tensor indices (mock example)
            label_lengths = torch.tensor([len(lbl) for lbl in labels])
            labels = torch.cat([torch.tensor([ord(c) - ord('a') for c in lbl], dtype=torch.long) for lbl in labels])

            optimizer.zero_grad()
            predictions = model(images)

            # Compute loss
            input_lengths = torch.full(size=(predictions.size(1),), fill_value=predictions.size(0), dtype=torch.long)
            loss = ctc_loss(predictions.log_softmax(2), labels, input_lengths, label_lengths)

            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

# Train the model
# train(model, dataloader, optimizer, ctc_loss, num_epochs=10)


# Evaluation (Character & Word Error Rate)

In [26]:
!pip install textblob symspellpy
!python -m textblob.download_corpora

Collecting symspellpy
  Downloading symspellpy-6.7.8-py3-none-any.whl.metadata (3.9 kB)
Collecting editdistpy>=0.1.3 (from symspellpy)
  Downloading editdistpy-0.1.5-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Downloading symspellpy-6.7.8-py3-none-any.whl (2.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m62.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading editdistpy-0.1.5-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.1/144.1 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: editdistpy, symspellpy
Successfully installed editdistpy-0.1.5 symspellpy-6.7.8
[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...

In [27]:
from symspellpy.symspellpy import SymSpell, Verbosity

# Initialize SymSpell
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)

# Load a dictionary (or add custom words)
sym_spell.load_dictionary("frequency_dictionary_en_82_765.txt", term_index=0, count_index=1)

def correct_with_symspell(text):
    suggestions = sym_spell.lookup(text, Verbosity.CLOSEST, max_edit_distance=2)
    return suggestions[0].term if suggestions else text  # Return best match

# Example
true_text = "Paracetamol 500mg"
predicted_text = "Paracetamo 500mg"

corrected_text = correct_with_symspell(predicted_text)
print(f"Original Prediction: {predicted_text}")
print(f"Corrected Prediction: {corrected_text}")


2025-02-24 10:38:27,659: E symspellpy.symspellpy] Dictionary file not found at frequency_dictionary_en_82_765.txt.
ERROR:symspellpy.symspellpy:Dictionary file not found at frequency_dictionary_en_82_765.txt.


Original Prediction: Paracetamo 500mg
Corrected Prediction: Paracetamo 500mg


In [33]:
!pip install happytransformer

Collecting happytransformer
  Downloading happytransformer-3.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting datasets<3.0.0,>=2.13.1 (from happytransformer)
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting accelerate<1.0.0,>=0.20.1 (from happytransformer)
  Downloading accelerate-0.34.2-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets<3.0.0,>=2.13.1->happytransformer)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets<3.0.0,>=2.13.1->happytransformer)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets<3.0.0,>=2.13.1->happytransformer)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.6.1,>=2023.1.0 (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets<3.0.0,>=2.13.1->happytransformer)
  Downloading fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
INFO: pip 

In [34]:
from happytransformer import HappyTextToText

# Load model
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")

# Correct text
def correct_text(text):
    return happy_tt.generate_text(f"fix: {text}").text

print(correct_text("Paracetamo 500mg"))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

Device set to use cpu


Paracetamo 500 mg. Fix: Paracetamo 500 mg.


In [39]:
from jiwer import cer, wer

true_text = "Paracetamol 500mg"
predicted_text = "Paracetamo 500mg"

# Apply your best correction method
corrected_text = correct_with_symspell(predicted_text)  # Use the function you implemented

# Evaluate before and after correction
print("Before Correction:")
print(f"Character Error Rate: {cer(true_text, predicted_text)}")
print(f"Word Error Rate: {wer(true_text, predicted_text)}")

print("\nAfter Correction:")
print(f"Character Error Rate: {cer(true_text, corrected_text)}")
print(f"Word Error Rate: {wer(true_text, corrected_text)}")

Before Correction:
Character Error Rate: 0.058823529411764705
Word Error Rate: 0.5

After Correction:
Character Error Rate: 0.058823529411764705
Word Error Rate: 0.5


# Model Deployment

In [44]:
!pip install fastapi
!pip install uvicorn

Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Downloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn
Successfully installed uvicorn-0.34.0


In [46]:
import nest_asyncio
nest_asyncio.apply()

from fastapi import FastAPI
import torch

app = FastAPI()

@app.post("/predict/")
async def predict(text: str):
    # Load model and predict
    # Note: model.predict(text) might need adjustments
    # based on your model's actual prediction method.
    result = model(text) # Assuming your model is called 'model'
    return {"prediction": result}

# Run server
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [230]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [230]


In [50]:
from fastapi import FastAPI
import torch

app = FastAPI()

@app.post("/predict/")
async def predict(text: str):
    # Load model and predict
    # Note: model.predict(text) might need adjustments
    # based on your model's actual prediction method.
    result = model(text) # Assuming your model is called 'model'
    return {"prediction": result}

# Run server
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [230]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-4' coro=<Server.serve() done, defined at /usr/local/lib/python3.11/dist-packages/uvicorn/server.py:68> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/main.py", line 579, in run
    server.run()
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/server.py", line 66, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    s

In [51]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.42.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.42.2-py2.py3-none-any.whl (9.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m39.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m38.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[

In [52]:
import streamlit as st

st.title("Pharmacist AI Assistant")

user_input = st.text_input("Enter Handwritten Prescription Text")
if st.button("Predict"):
    prediction = model.predict(user_input)
    st.write("Prediction:", prediction)


ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-1' coro=<Server.serve() done, defined at /usr/local/lib/python3.11/dist-packages/uvicorn/server.py:68> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/main.py", line 579, in run
    server.run()
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/server.py", line 66, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run
    se

In [53]:
from jiwer import cer, wer

true_text = "Paracetamol 500mg"
predicted_text = "Paracetamo 500mg"

print("Character Error Rate:", cer(true_text, predicted_text))
print("Word Error Rate:", wer(true_text, predicted_text))


Character Error Rate: 0.058823529411764705
Word Error Rate: 0.5
