In [6]:
# Setup device
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model and tokenizer
model_path = "./opt_collegebot"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)

# Set pad token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.eval()

# Define chat function with improved answer extraction
def chat(question, max_new_tokens=150):
    prompt = f"Question: {question}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
    
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            pad_token_id=tokenizer.pad_token_id,
            do_sample=True,
            top_p=0.95,
            temperature=0.4,
            eos_token_id=tokenizer.eos_token_id
        )
    
    # Decode and post-process
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Use regex to find the first answer block
    answer_match = re.search(r'Answer:\s*(.*?)(\n\n|Question:|$)', decoded, re.DOTALL)
    if answer_match:
        answer = answer_match.group(1).strip()
    else:
        answer = "Sorry, I couldn't generate a proper response. Please try again."
    
    return answer

# Interactive chat loop
print("🤖 CollegeBot is ready! (type 'exit' to quit)\n")

while True:
    user_question = input("You: ")
    if user_question.lower() in {"exit", "quit", "bye"}:
        print("👋 Goodbye!")
        break
    
    response = chat(user_question)
    print(f"Bot: {response}\n")

🤖 CollegeBot is ready! (type 'exit' to quit)

👋 Goodbye!
