<a href="https://colab.research.google.com/github/Ravikrishnan05/PrediscanMedtech_project/blob/main/fastapi_for_backend.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# %%
# RUN THIS CELL FIRST to forcefully uninstall the entire conflicting PyTorch ecosystem.
!pip uninstall -y torch torchvision torchaudio triton flash-attn unsloth bitsandbytes xformers fastai

In [None]:
# %%
# NOW, RUN THE OFFICIAL INSTALLATION.
# It will install torch, triton, bitsandbytes, and flash-attn correctly.
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
# %%
# After Unsloth is installed, we can install compatible versions of these libraries.
!pip install torchvision torchaudio

In [None]:
# %%
# AFTER RESTARTING, RUN THIS CELL DIRECTLY

import torch
from unsloth import FastLanguageModel
from transformers import AutoProcessor

# Verification
import unsloth
print("✅ Unsloth and its dependencies are loaded correctly!")
print("Unsloth version:", unsloth.__version__)
print("PyTorch version:", torch.__version__)
if torch.cuda.is_available():
    # Let's also check torchvision, which we will install
    try:
        import torchvision
        print("Torchvision version:", torchvision.__version__)
    except ImportError:
        print("Installing compatible torchvision...")
        !pip install torchvision torchaudio
        import torchvision
        print("Torchvision version:", torchvision.__version__)


# --- Model Initialization ---
model_id = "google/medgemma-4b-it"

device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_id,
    max_seq_length = 2048,
    dtype = dtype,
    load_in_4bit = True,
)
model.eval()

processor = AutoProcessor.from_pretrained(model_id)

print("\n✅ Model and processor loaded successfully!")

In [None]:
# %%
# Install the necessary packages for running the web API
!pip install fastapi uvicorn "python-multipart<0.0.7" pyngrok nest_asyncio

In [None]:
# %%
# RUN THIS CELL TO START YOUR API SERVER
# %%
# --- REFINED AND MORE ROBUST API SERVER ---

# (Imports and app setup are the same)
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from PIL import Image
import torch
import io
import nest_asyncio
from pyngrok import ngrok
import uvicorn
import traceback # Import for detailed error logging

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
)

@app.get("/")
def read_root():
    return {"message": "✅ MedGemma API is working!"}

@app.post("/generate")
async def generate(prompt: str = Form(...), image: UploadFile = File(...)):
    try:
        image_bytes = await image.read()
        pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

        # The standard way to format input for MedGemma / Idefics2 models
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image"},
                    {"type": "text", "text": prompt},
                ]
            }
        ]

        # The processor handles text and images together
        prompt_text = processor.apply_chat_template(messages, add_generation_prompt=True)
        inputs = processor(text=prompt_text, images=pil_image, return_tensors="pt").to(model.device)

        # Generate
        generated_ids = model.generate(**inputs, max_new_tokens=512, pad_token_id=tokenizer.pad_token_id)
        generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)

        # The response is typically the last part of the generated text
        response_text = generated_texts[0].split("Assistant: ")[-1].strip()

        return JSONResponse(content={"response": response_text})

    except Exception as e:
        # This will print the full error to your server log for easy debugging
        print(traceback.format_exc())
        return JSONResponse(status_code=500, content={"error": str(e)})

In [None]:

# --- Launch the Server ---
# Set your ngrok auth token
ngrok.set_auth_token("2yH8AJ7M9rJNbLfQoRANIuEudom_5Ppznh7REV7PvB51AaZXe")

# Allow uvicorn to run in a notebook
nest_asyncio.apply()

# Connect to ngrok and get a public URL
public_url = ngrok.connect(8000)
print("🌐 Public API URL:", public_url)

# Start the uvicorn server
uvicorn.run(app, port=8000)

In [None]:
# %%
# --- LOCALHOST TEST SCRIPT ---
# This test bypasses ngrok and the internet completely.

import requests
import os

# We connect directly to the server inside the Colab machine
local_api_url = "http://127.0.0.1:8000"

image_path = "/content/1.png"

if not os.path.exists(image_path):
    print(f"❌ Error: Test image '{image_path}' not found. Please upload it.")
else:
    endpoint = f"{local_api_url}/generate"
    data = {"prompt": "What are the key findings in this fundus image"}
    files = {"image": (image_path, open(image_path, "rb"), "image/png")}

    print("🚀 Sending request directly to localhost...")
    try:
        # We send the request to the local server
        response = requests.post(endpoint, data=data, files=files, timeout=300) # 5 min timeout
        response.raise_for_status()
        print("\n✅ Success! Localhost API Response:")
        print(response.json())
    except requests.exceptions.RequestException as e:
        print(f"\n❌ An error occurred during the local request: {e}")