# **COVID-19 Vaccine Safety QA API**

This notebook sets up an **end-to-end FastAPI service** for serving a fine-tuned LLM using a LoRA adapter.  

# Install dependencies

In [1]:
!pip install -q fastapi uvicorn pyngrok transformers peft accelerate nest_asyncio


# Apply Nest AsyncIO
Needed to run FastAPI and ngrok inside the notebook without event loop conflicts


In [2]:
import nest_asyncio
nest_asyncio.apply()

# Import Required Libraries

In [3]:
from fastapi import FastAPI, Header, HTTPException
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
from pyngrok import ngrok
import uvicorn
import threading
import asyncio
import time




# Ngrok Authentication
Set your ngrok auth token to create a public URL


In [4]:
from google.colab import userdata
from pyngrok import ngrok
ngrok_token = userdata.get('ngrok')
ngrok.set_auth_token(ngrok_token)

# Initialize FastAPI App

In [5]:
app = FastAPI(title="COVID-19 Vaccine Safety QA API")


# Load Base Model + LoRA Adapter


In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
LORA_PATH = "/content/drive/MyDrive/COVID-19 Vaccine Side Effects and Safety/models"

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and base model
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16 if device=="cuda" else torch.float32
)

# Load LoRA adapter
model = PeftModel.from_pretrained(base_model, LORA_PATH)
model.to(device)
print(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`torch_dtype` is deprecated! Use `dtype` instead!


cpu


# Define Request & Response Schemas


In [8]:
class QARequest(BaseModel):
    question: str

class QAResponse(BaseModel):
    answer: str


# Creating End points


In [9]:
@app.get("/")
def root():
    return {"message": "API is running"}

@app.post("/qa/", response_model=QAResponse)
def get_answer(request: QARequest):
    prompt = f"Answer the following question concisely in plain text. Do not repeat the question.\nQuestion: {request.question}\nAnswer:"

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512
    )
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device)

    output_ids = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=200
    )

    full_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    if "Answer:" in full_text:
        answer = full_text.split("Answer:")[-1].strip()
    else:
        answer = full_text.strip()

    return QAResponse(answer=answer)


# Create Public URL with ngrok

In [10]:
public_url = ngrok.connect(8000)
print("Public URL:", public_url)

Public URL: NgrokTunnel: "https://nondisciplinable-subterrestrial-chante.ngrok-free.dev" -> "http://localhost:8000"


# Run FastAPI Server in Background Thread

In [11]:
def run_fastapi_server():
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    asyncio.run(server.serve())

threading.Thread(target=run_fastapi_server, daemon=True).start()


# Test QA Endpoint with cURL
Send a POST request to the FastAPI `/qa/` endpoint and display only the modelâ€™s answer


In [13]:
!curl -X POST "https://nondisciplinable-subterrestrial-chante.ngrok-free.dev/qa/" \
-H "Content-Type: application/json" \
-d '{"question": "What are the common side effects of COVID-19 vaccines?"}'


INFO:     34.73.220.170:0 - "POST /qa/ HTTP/1.1" 200 OK
{"answer":"Common side effects include fever, fatigue, headache, muscle pain, joint pain, and chills. These symptoms usually last for a few days after vaccination."}