In [None]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

# Dataset

In [None]:
from sklearn.model_selection import train_test_split

data = pd.read_csv("/kaggle/input/llm1234/train.csv")
train_df, test_df = train_test_split(data, test_size=0.2, random_state=42)


In [None]:
model_name='google/flan-t5-base'

original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Metric

In [2]:
def mapk(actual, predicted, k=3):
    """
    Computes the mean average precision at k (MAP@k).
    
    Parameters:
    actual : list
        A list of correct labels, where each entry corresponds to the correct label for a single question.
    predicted : list
        A list of lists. Each inner list contains the predicted labels for a single question.
    k : int, optional (default=3)
        The maximum number of predicted elements to consider.
        
    Returns:
    score : float
        The MAP@k score for the predictions.
    """
    def apk(actual, predicted, k=3):
        """
        Computes the average precision at k (AP@k) for a single observation.
        """
        if len(predicted) > k:
            predicted = predicted[:k]
        
        for i, p in enumerate(predicted):
            if p == actual:
                return 1.0 / (i + 1.0)
        
        return 0.0
    
    return sum(apk(a, p, k) for a, p in zip(actual, predicted)) / len(actual)

# Example usage
actual = ['A', 'B', 'C']
predicted = [['A', 'B', 'C'], ['D', 'B', 'C'], ['C', 'A', 'B']]
score = mapk(actual, predicted, k=3)
print(f'MAP@3: {score}')

MAP@3: 0.8333333333333334


# Zero shot inferencing 

In [None]:
index = 79

question = train_df.iloc[index]['prompt']
A = train_df.iloc[index]['A']
B = train_df.iloc[index]['B']
C = train_df.iloc[index]['C']
D = train_df.iloc[index]['D']
E = train_df.iloc[index]['E']
answer = train_df.iloc[index]['answer']



prompt = f"""
Choose the correct answer for this question. Provide only the letter (A, B, C, D, or E) of the correct answer as the output

Question : {question}
A : {A}
B : {B}
C : {C}
D : {D}
E : {E}

"""

inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
    original_model.generate(
        inputs["input_ids"], 
        max_new_tokens=200,
    )[0], 
    skip_special_tokens=True
)

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'Correct Answer:\n{answer}\n')
print(dash_line)
print(f'MODEL Prediction - ZERO SHOT:\n{output}')