In [None]:
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from pyngrok import ngrok
import torch
from uvicorn import run
from threading import Thread

# Initialize FastAPI app
app = FastAPI()

# ✅ Add CORS Middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Or use ["http://localhost:5173"] for more security
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Load the model and tokenizer
model_path = '/content/opt_collegebot'  # Update with actual model path in Colab
model = AutoModelForCausalLM.from_pretrained(model_path).to('cuda')
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Request body schema
class RequestBody(BaseModel):
    question: str

# API endpoint
@app.post("/predict/")
async def predict(body: RequestBody):
    input_text = f"Question: {body.question} Answer:"
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
    output = model.generate(inputs['input_ids'], max_length=100)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return {"response": response}

# Start FastAPI server
def run_server():
    run(app, host="0.0.0.0", port=8000)

server_thread = Thread(target=run_server)
server_thread.start()

# Authenticate and expose via ngrok
ngrok.set_auth_token("2w8mC9wteCIN2hvIYh7W9FIJTAQ_83dHxJNxfTMZUaw2wiivq")
public_url = ngrok.connect(8000)
print(f"🚀 Public API URL: {public_url}")
