<a href="https://colab.research.google.com/github/NagaPawan-Keerthan/MedScan-AI/blob/main/medScan_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision torchaudio
!pip install transformers datasets
!pip install accelerate

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
!ls -lh /content/drive/MyDrive/dataset/dataset.zip

In [None]:
!unzip /content/drive/MyDrive/dataset/dataset.zip -d /content/drive/MyDrive/dataset


In [None]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")


In [None]:
!pip install torch torchvision torchaudio
!pip install transformers datasets accelerate


In [None]:
!ls -lh /content/drive/MyDrive/dataset/dataset/


In [None]:
import torch
import numpy as np
import os
from datasets import load_dataset
from transformers import ViTForImageClassification, ViTImageProcessor, TrainingArguments, Trainer

# ✅ Set Correct Dataset Paths for Google Colab
DATASET_PATH = "/content/drive/MyDrive/dataset/dataset/"  # Adjusted for Google Drive
TRAIN_DIR = DATASET_PATH + "train/"
TEST_DIR = DATASET_PATH + "test/"
MODEL_SAVE_PATH = "/content/drive/MyDrive/pneumonia_vit_model"  # Save model to Drive

NUM_LABELS = 3  # Normal, Pneumonia, Coronahack
BATCH_SIZE = 32  # ✅ Increase for GPU Efficiency
EPOCHS = 5

# ✅ Check for GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🚀 Training on: {device}")

# ✅ Load Train & Test Datasets
train_dataset = load_dataset("imagefolder", data_dir=TRAIN_DIR, split="train")
test_dataset = load_dataset("imagefolder", data_dir=TEST_DIR, split="train")

print("✅ Dataset Loaded Successfully!")
print("✅ Train Dataset Columns:", train_dataset.column_names)
print("✅ Test Dataset Columns:", test_dataset.column_names)

# ✅ Load Feature Extractor (Use `ViTImageProcessor` Instead of Deprecated `ViTFeatureExtractor`)
feature_extractor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

# ✅ Fix `pixel_values` Key Issue in Dataset
def process_images(example):
    """Convert images to pixel_values using feature extractor."""
    if "image" not in example:
        print("❌ Missing 'image' key in batch!")
        return {}

    # ✅ Convert to RGB and Extract `pixel_values`
    example["pixel_values"] = feature_extractor(images=example["image"].convert("RGB"), return_tensors="pt")["pixel_values"][0]
    return example

# ✅ Apply `map()` to Transform Data Efficiently
train_dataset = train_dataset.map(process_images, remove_columns=["image"])
test_dataset = test_dataset.map(process_images, remove_columns=["image"])

# ✅ Load Pretrained ViT Model
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=NUM_LABELS,
    id2label={0: "Normal", 1: "Pneumonia", 2: "Coronahack"},
    label2id={"Normal": 0, "Pneumonia": 1, "Coronahack": 2}
).to(device)  # ✅ Move Model to GPU

# ✅ Define Optimized Training Arguments for GPU
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",  # ✅ Evaluate after every epoch
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=0.01,
    logging_dir="./logs",
    save_total_limit=1,
    fp16=True,  # ✅ Enable Mixed Precision for Faster Training on GPU
    report_to="none",  # Disable WandB logging
)

# ✅ Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,  # ✅ Ensure Evaluation Dataset is Used
    tokenizer=feature_extractor,
)

# ✅ Start Training
print("🚀 Starting Training...")
trainer.train()

# ✅ Save Model to Google Drive
model.save_pretrained(MODEL_SAVE_PATH)
print(f"✅ Model Training Complete! Saved at `{MODEL_SAVE_PATH}`")

🚀 Training on: cuda


Resolving data files:   0%|          | 0/10525 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/10525 [00:00<?, ?files/s]

Generating train split: 0 examples [00:00, ? examples/s]

Resolving data files:   0%|          | 0/1248 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/1248 [00:00<?, ?files/s]

Generating train split: 0 examples [00:00, ? examples/s]

✅ Dataset Loaded Successfully!
✅ Train Dataset Columns: ['image', 'label']
✅ Test Dataset Columns: ['image', 'label']


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Map:   0%|          | 0/10525 [00:00<?, ? examples/s]

Map:   0%|          | 0/1248 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


🚀 Starting Training...


Epoch,Training Loss,Validation Loss
1,No log,1.015262
2,0.770800,1.041283
3,0.770800,1.048934
4,0.721200,1.157089
5,0.706000,1.078324


✅ Model Training Complete! Saved at `/content/drive/MyDrive/pneumonia_vit_model`


In [None]:
!pip install transformers torch safetensors


In [None]:
import torch
from transformers import ViTForImageClassification, ViTImageProcessor
from safetensors.torch import load_file
from PIL import Image

# ✅ Define Model Path
MODEL_PATH = "/content/drive/MyDrive/pneumonia_vit_model/"

# ✅ Load Model Configuration
model = ViTForImageClassification.from_pretrained(MODEL_PATH)

# ✅ Load Model Weights from `.safetensors`
model.load_state_dict(load_file(f"{MODEL_PATH}/model.safetensors"))
model.eval()  # Set model to evaluation mode

# ✅ Load Preprocessor (Ensure You Used It During Training)
processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

# ✅ Define Class Labels (Ensure They Match Your Training)
CLASS_LABELS = ["Normal", "Pneumonia", "Coronahack"]

def predict_image(image_path):
    """Predicts if the image is Normal, Pneumonia, or Coronahack."""
    img = Image.open(image_path).convert("RGB")

    # ✅ Preprocess Image
    inputs = processor(images=img, return_tensors="pt")

    # ✅ Run Inference
    with torch.no_grad():
        outputs = model(**inputs)

    # ✅ Get Predicted Class
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=-1).item()
    confidence = torch.softmax(logits, dim=-1)[0][predicted_class].item()

    return CLASS_LABELS[predicted_class], confidence

# ✅ Test the Model on a New Image
TEST_IMAGE = "/content/drive/MyDrive/dataset/dataset/val/PNEUMONIA/person1946_bacteria_4874.jpeg"  # Change this path
label, conf = predict_image(TEST_IMAGE)

print(f"✅ Prediction: {label} (Confidence: {conf:.2f})")


✅ Prediction: Pneumonia (Confidence: 0.50)


In [None]:
!pip install reportlab
!pip install fastapi uvicorn transformers torch torchvision torchaudio pillow safetensors reportlab


In [None]:
pip install python-multipart

In [None]:
!pip install pyngrok

In [None]:
!ngrok authtoken 2tCn9yCmAvDplmZJqfZpbDSH4qe_KuQbRNnXmpnedP4QDPwY

In [None]:
import os
import torch
import cv2
import nest_asyncio
import uvicorn
import numpy as np
import logging
from PIL import Image
from pyngrok import ngrok
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import FileResponse, JSONResponse
from transformers import ViTForImageClassification, ViTImageProcessor
from safetensors.torch import load_file
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
import threading
import time

# ✅ Kill any previous FastAPI instances
!fuser -k 8000/tcp

# ✅ Enable Logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# ✅ Fix: Allow running FastAPI in Jupyter Notebooks
nest_asyncio.apply()

# ✅ Set Paths
MODEL_PATH = "/content/drive/MyDrive/pneumonia_vit_model/"
DOWNLOADS_FOLDER = "/content/drive/MyDrive/predictions/"
os.makedirs(DOWNLOADS_FOLDER, exist_ok=True)

# ✅ Check Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.info(f"✅ Using Device: {device}")

# ✅ Load Pretrained Model & Processor
model = ViTForImageClassification.from_pretrained(MODEL_PATH).to(device)
model.load_state_dict(load_file(f"{MODEL_PATH}/model.safetensors"))
model.eval()
processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

# ✅ Define Class Labels
CLASS_LABELS = ["Normal", "Pneumonia", "Coronahack"]

# ✅ Initialize FastAPI
app = FastAPI()

def predict_diagnosis(image_path):
    """Predict if an image is Normal, Pneumonia, or Coronahack."""
    try:
        img = Image.open(image_path).convert("RGB")
        inputs = processor(images=img, return_tensors="pt").to(device)

        with torch.no_grad():
            outputs = model(**inputs)

        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=-1).item()
        confidence = torch.softmax(logits, dim=-1)[0][predicted_class].item()

        return CLASS_LABELS[predicted_class], confidence

    except Exception as e:
        logging.error(f"❌ Error in prediction: {e}")
        return "Error", 0.0

def generate_pdf_report(image_path, diagnosis, confidence):
    """Generates a PDF report with uploaded image and diagnosis results."""
    try:
        pdf_path = os.path.join(DOWNLOADS_FOLDER, f"{os.path.basename(image_path)}_report.pdf")
        c = canvas.Canvas(pdf_path, pagesize=letter)
        c.setFont("Helvetica-Bold", 16)
        c.drawString(50, 750, f"Medical Image Report: {os.path.basename(image_path)}")

        c.setFont("Helvetica-Bold", 12)
        c.drawString(50, 700, "Diagnosis Result:")
        c.setFont("Helvetica", 12)
        c.drawString(50, 680, f"Predicted: {diagnosis} (Confidence: {confidence * 100:.2f}%)")

        c.setFont("Helvetica-Bold", 12)
        c.drawString(50, 650, "Query Image:")
        c.drawImage(ImageReader(image_path), 50, 450, width=200, height=200)

        c.save()
        logging.info(f"✅ PDF Report Saved: {pdf_path}")
        return pdf_path

    except Exception as e:
        logging.error(f"❌ Error in PDF generation: {e}")
        return None

@app.post("/predict/")
async def upload_query_image(file: UploadFile = File(...)):
    """Handles image upload, processes it, runs prediction, and creates a PDF report."""
    try:
        file_location = os.path.join(DOWNLOADS_FOLDER, file.filename)
        with open(file_location, "wb") as f:
            f.write(await file.read())

        logging.info(f"✅ File Uploaded: {file.filename}")

        # ✅ Run AI Prediction
        diagnosis, confidence = predict_diagnosis(file_location)

        # ✅ Generate PDF Report
        report_path = generate_pdf_report(file_location, diagnosis, confidence)

        return JSONResponse({
            "message": "Processing completed!",
            "diagnosis": diagnosis,
            "confidence": confidence,
            "report_url": f"/download/{os.path.basename(report_path)}"
        })

    except Exception as e:
        logging.error(f"❌ Error in processing: {e}")
        return JSONResponse({"message": "Error in processing.", "error": str(e)}, status_code=500)

@app.get("/download/{report_name}")
async def download_report(report_name: str):
    """Allows users to download the generated PDF report."""
    report_path = os.path.join(DOWNLOADS_FOLDER, report_name)
    if os.path.exists(report_path):
        return FileResponse(report_path, media_type="application/pdf", filename=report_name)
    return JSONResponse({"message": "Report not found."}, status_code=404)

# ✅ Start FastAPI in a Background Thread
def run_app():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# ✅ Start ngrok in a Background Thread
def start_ngrok():
    time.sleep(5)  # Ensure FastAPI starts first
    public_url = ngrok.connect(8000)
    print(f"🚀 Public API URL: {public_url}")

# ✅ Run Both in Parallel
threading.Thread(target=run_app, daemon=True).start()
threading.Thread(target=start_ngrok, daemon=True).start()

# ✅ Keep Colab Running
while True:
    time.sleep(100)


In [None]:
import requests

# ✅ Replace with the actual public ngrok URL from Step 2
API_URL = "http://localhost:8000/predict/"

# ✅ Upload an Image
file_path = "/content/drive/MyDrive/dataset/dataset/test/PNEUMONIA/person100_bacteria_475.jpeg"  # Change this to your image path

with open(file_path, "rb") as f:
    response = requests.post(API_URL, files={"file": f})

print(response.json())


INFO:     127.0.0.1:42394 - "POST /predict/ HTTP/1.1" 200 OK
{'message': 'Processing completed!', 'diagnosis': 'Pneumonia', 'confidence': 0.5026567578315735, 'report_url': '/download/person100_bacteria_475.jpeg_report.pdf'}
