In [None]:
from datasets import load_dataset
import torch 
from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoTokenizer, pipeline , GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

# Dataset

In [None]:
from sklearn.model_selection import train_test_split

data = pd.read_csv("/kaggle/input/llm1234/train.csv")
train_df, test_df = train_test_split(data, test_size=0.2, random_state=42)


In [None]:
torch.random.manual_seed(0) 
model = AutoModelForCausalLM.from_pretrained( 
    "microsoft/Phi-3-mini-4k-instruct",  
    device_map="cuda",  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") 

# Metric

In [2]:
def mapk(actual, predicted, k=3):
    """
    Computes the mean average precision at k (MAP@k).
    
    Parameters:
    actual : list
        A list of correct labels, where each entry corresponds to the correct label for a single question.
    predicted : list
        A list of lists. Each inner list contains the predicted labels for a single question.
    k : int, optional (default=3)
        The maximum number of predicted elements to consider.
        
    Returns:
    score : float
        The MAP@k score for the predictions.
    """
    def apk(actual, predicted, k=3):
        """
        Computes the average precision at k (AP@k) for a single observation.
        """
        if len(predicted) > k:
            predicted = predicted[:k]
        
        for i, p in enumerate(predicted):
            if p == actual:
                return 1.0 / (i + 1.0)
        
        return 0.0
    
    return sum(apk(a, p, k) for a, p in zip(actual, predicted)) / len(actual)

# Example usage
actual = ['A', 'B', 'C']
predicted = [['A', 'B', 'C'], ['D', 'B', 'C'], ['C', 'A', 'B']]
score = mapk(actual, predicted, k=3)
print(f'MAP@3: {score}')

MAP@3: 0.8333333333333334


# Zero shot inferencing 

In [None]:
index = 0

question = train_df.iloc[index]['prompt']
A = train_df.iloc[index]['A']
B = train_df.iloc[index]['B']
C = train_df.iloc[index]['C']
D = train_df.iloc[index]['D']
E = train_df.iloc[index]['E']
answer = train_df.iloc[index]['answer']




example = f"""
Choose the correct answer for this question. To answer this question, let's analyze each option step by step 

Question : What is the capital of france
A : Lyon
B : Paris
C : London
D : Toulouse
E : Madrid

"""

prompt = f"""
Choose the correct answer for this question. To answer this question, let's analyze each option step by step 

Question : {question}
A : {A}
B : {B}
C : {C}
D : {D}
E : {E}

"""



messages = [ 
    {"role": "system", "content": "You are a helpful AI assistant for question answering."}, 
    {"role": "user", "content": f"{example}"}, 
    {"role": "assistant", "content": "B"}, 
    {"role": "user", "content": f"{prompt}"}, 
] 

pipe = pipeline( 
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
) 

generation_args = { 
    "max_new_tokens": 500, 
    "return_full_text": False, 
    "temperature": 0.0, 
    "do_sample": False, 
} 

output = pipe(messages, **generation_args) 
responce = output[0]['generated_text'] 

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'Correct Answer:\n{answer}\n')
print(dash_line)
print(f'MODEL Prediction - ZERO SHOT:\n{responce}')

---------------------------------------------------------------------------------------------------
INPUT PROMPT:

Choose the correct answer for this question. To answer this question, let's analyze each option step by step 

Question : What is a Hilbert space in quantum mechanics?
A : A complex vector space where the state of a classical mechanical system is described by a vector |Ψ⟩.
B : A physical space where the state of a classical mechanical system is described by a vector |Ψ⟩.
C : A physical space where the state of a quantum mechanical system is described by a vector |Ψ⟩.
D : A mathematical space where the state of a classical mechanical system is described by a vector |Ψ⟩.
E : A complex vector space where the state of a quantum mechanical system is described by a vector |Ψ⟩.


---------------------------------------------------------------------------------------------------
Correct Answer:

E

---------------------------------------------------------------------------------------------------
MODEL Prediction - ZERO SHOT:

 E

In [None]:
# Initialize lists to hold actual and predicted values
actual_labels = []
predicted_labels = []

pipe = pipeline( 
        "text-generation", 
        model=model, 
        tokenizer=tokenizer, 
    ) 

generation_args = { 
    "max_new_tokens": 500, 
    "return_full_text": False, 
    "temperature": 0.0, 
    "do_sample": False, 
} 

for index, row in test_df.iterrows():
    question = row['prompt']
    A = row['A']
    B = row['B']
    C = row['C']
    D = row['D']
    E = row['E']
    answer = row['answer']
    
    # Add the correct answer to the actual labels list
    actual_labels.append(answer)
    
    prompt = f"""
    Choose the correct answer for this question. To answer this question, let's analyze each option step by step 

    Question : {question}
    A : {A}
    B : {B}
    C : {C}
    D : {D}
    E : {E}

    """
    
    # Construct the prompt
    messages = [ 
    {"role": "system", "content": "You are a helpful AI assistant for question answering."}, 
    {"role": "user", "content": f"{example}"}, 
    {"role": "assistant", "content": "B"}, 
    {"role": "user", "content": f"{prompt}"}, 
    ] 

    output = pipe(messages, **generation_args) 
    responce = output[0]['generated_text'] 
    
    # Convert the output to a list and add it to the predicted labels list
    predicted_labels.append([responce.strip()])
    
# Calculate the MAP@k
k = 3  # You can change k if needed
mapk_score = mapk(actual_labels, predicted_labels, k)
print(f'MAP@{k} with the original model - Zero Shot: {mapk_score}')

MAP@3 with the original model - Zero Shot: 0.775
