In [None]:
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
import torch
import gc
import os

try:
    data = pd.read_csv(r"finalDataset.csv", encoding='ISO-8859-1')
except UnicodeDecodeError:
    print("Failed to load with ISO-8859-1 encoding, trying windows-1252")
    data = pd.read_csv(r"finalDataset.csv", encoding='windows-1252')

gc.collect()  # Run garbage collection to free CPU RAM
torch.cuda.empty_cache()  # Clear GPU memory
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
start_time = time.time()
model_name = "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0"  # Replace with the actual model name if different
huggingface_token = 'HUGGINGFACE_TOKEN = "YOUR_TOKEN_HERE"

tokenizer = AutoTokenizer.from_pretrained(model_name, token=huggingface_token)
model = AutoModelForCausalLM.from_pretrained(model_name, token=huggingface_token)
model.to(device)
print("Model and tokenizer loaded successfully")

progress = 0
correct_answers = 0
total_questions = len(data)
model_answers = []
selected_options = []
predicted_correctly = []
for index, row in data.iterrows():
    question_with_options = f"""
    You are a person from India with deep knowledge and lived experience of Indian culture.
    Now, answer the following question using your expertise in Indian culture by identifying the specific cultural element being referred to.
    Respond only with the name of the cultural element (e.g., Indian) — no additional text, questions, or explanations.

    Question: {row['Corrected Question']}
"""
    
    inputs = tokenizer.encode(question_with_options, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(inputs, max_length=1000)
    model_answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    model_answer = model_answer.replace(question_with_options, "").strip()

    if row['answer'].strip().lower() in model_answer.strip().lower():
        prediction_correctness = 'True'
        correct_answers += 1
    else:
        prediction_correctness = 'False'

    progress += 1
    print(f"Progress: {progress}/{total_questions}")
    print(f"Model Answer: {model_answer}")
    print(f"Correct Answer: {row['Answer']}")
    model_answers.append(model_answer)
    predicted_correctly.append(prediction_correctness)
    print("Correct answers so far:", correct_answers)

# Calculate the accuracy
accuracy = correct_answers / total_questions
print(f"Final Accuracy: {accuracy * 100:.2f}%")
print(f"Total Correct Answers: {correct_answers}")
data['Model_Answer'] = model_answers
data['Predicted Correctly'] = predicted_correctly
data.to_csv('finalDataset_teluguLLM.csv', index=False)
end_time = time.time()
time_taken = end_time - start_time
