In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Path to the saved model directory
model_path = "/content/opt_collegebot"  # change if your path is different

# Load the fine-tuned model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)

# Text generation function
def generate_answer(prompt, max_length=200):
    model.eval()
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_p=0.95,
            temperature=0.9
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Ask questions interactively
while True:
    user_question = input("\n🧠 Ask your question (or type 'exit'): ")
    if user_question.lower() in ['exit', 'quit']:
        break
    full_prompt = f"Question: {user_question}? Answer:"
    response = generate_answer(full_prompt)
    print("🤖", response)
