In [None]:
!pip install transformers accelerate bitsandbytes torch fastapi uvicorn pyngrok nest-asyncio langchain-ibm

In [None]:

from google.colab import userdata
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

print("Libraries imported.")

# Load secrets from Colab
try:
    hf_token = userdata.get('HF_TOKEN')
    ngrok_authtoken = userdata.get('NGROK_AUTHTOKEN')
    print("Credentials loaded successfully from Secrets.")
except Exception as e:
    print(f"Error loading credentials: {e}. Please check your Colab Secrets.")

In [None]:
print("Loading IBM Granite model... (This may take a few minutes)")

# Model path from Hugging Face
model_path = "ibm-granite/granite-3.3-2b-instruct"

# Configuration to save memory
quantization_config = BitsAndBytesConfig(load_in_4bit=True)

# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token)

# Load the Model
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    token=hf_token,
    quantization_config=quantization_config,
    device_map="auto"
)

print("✅ IBM Granite Model loaded successfully!")

In [None]:

import nest_asyncio
from pyngrok import ngrok
import uvicorn
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

# --- IBM Granite tho pani chese main function ---
def get_financial_advice(question, persona):
    chat_template = [
        {"role": "user", "content": f"You are a helpful Personal Finance assistant. A '{persona}' is asking a question. Answer clearly and concisely. Question: {question}"}
    ]
    prompt = tokenizer.apply_chat_template(chat_template, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt").to("cuda")
    outputs = model.generate(input_ids=inputs, max_new_tokens=250)
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    final_answer = response_text.split("<|assistant|>")[-1].strip()
    return final_answer
# ----------------------------------------------------------------

# FastAPI app setup
app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class Query(BaseModel):
    question: str
    persona: str

@app.post("/ask")
async def ask_ai(query: Query):
    advice = get_financial_advice(query.question, query.persona)
    return {"answer": advice}

# Ngrok setup and Server Start
ngrok.set_auth_token(ngrok_authtoken)
nest_asyncio.apply()
http_tunnel = ngrok.connect(8000)
print("------------------------------------------------------------")
print(f"✅ IBM Granite Backend Ready! Ee Public URL ni vaadukondi: {http_tunnel.public_url}")
print("------------------------------------------------------------")
uvicorn.run(app, host="0.0.0.0", port=8000)