In [1]:
!pip install outlines
!pip install context_cite

Collecting context_cite
  Downloading context_cite-0.0.4-py3-none-any.whl.metadata (7.7 kB)
Collecting nltk>=3.8.2 (from context_cite)
  Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting spacy (from context_cite)
  Using cached spacy-3.8.4-cp311-cp311-win_amd64.whl.metadata (27 kB)
Collecting click (from nltk>=3.8.2->context_cite)
  Using cached click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy->context_cite)
  Using cached spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy->context_cite)
  Using cached spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy->context_cite)
  Using cached murmurhash-1.0.12-cp311-cp311-win_amd64.whl.metadata (2.2 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy->context_cite)
  Using cached cymem-2.0.11-cp311-cp311-win_amd64.whl.metadata (8.8 kB)
Collecting preshed<3.1.0,>=3.0.2 (from 

## Imports

In [10]:
%load_ext autoreload
%autoreload 2

# Add the path to the parent directory to sys
import sys, os

# If current directory is called 'notebooks', chdir to the parent
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir('../')
    
sys.path.append('attribution')

from torch.utils.data import DataLoader

import pandas as pd
from constants import ModelNames, DatasetNames, LANGUAGE_MAPPING
from model_utils import Model 
from dataset_utils import GSMDataset, PaddingCollator, is_correct_gsm, extract_answer_gsm
from context_cite import ContextCiter

import warnings

# Filter specific warning categories
warnings.filterwarnings("ignore", category=UserWarning)  # For general user warnings
warnings.filterwarnings("ignore", category=FutureWarning)  # For deprecation warnings

# Definitions
processed_data_path = "results/analysis_mgsm_en_Qwen2.5-1.5B-Instruct_results.csv"
model_name = ModelNames.QwenInstruct

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Analysis: Processing Responses

In [16]:
def load_model_dataset():
    model = Model(ModelNames.QwenInstruct)
    return model, DatasetNames.MGSM

class ResponseProcessing():
    def __init__(self, model, dataset):
        self.df_column_names = ["question", "actual_answer", "model_gen_steps", "model_gen_answer" ]
        self.model = model
        self.dataset = dataset
    
    def convert_dashes_incremental_steps_list(self, steps):
        furnished_steps = []

        i = 1
        for _, step in enumerate(steps[1:]):
            if step:  # Skip empty parts (if any)
                
                # I removed the full stop because contextcite treats the step number itself as a new sentence
                furnished_steps.append(str(i) + " " + step)  # Replace with number (1, 2, 3...)
                i += 1
        
        return furnished_steps

    def convert_dashes_incremental_steps(self, step):

        '''
        Returns str
        '''

        furnished_steps = self.convert_dashes_incremental_steps_list(step)

        final_str = "Step-by-Step Answer:\n"

        # Added a \n to better separate the steps
        final_str += "\n".join(furnished_steps)

        return final_str


    def process_model_responses_for_analysis(self):

        mgsm_test = GSMDataset(self.dataset, self.model.tokenizer, instructions='', split='test', config='en')

        mgsm_generation_df = pd.read_csv('results\mgsm_en_Qwen2.5-1.5B-Instruct_results.csv')
        mgsm_generations = mgsm_generation_df['response'].tolist()

        all_steps = []
        all_gen_final_ans = []

        for response in mgsm_generations:
            
            steps = response.split("\n-")
            final_step = steps[-1].split(".\r\n")[0]
            steps.pop()
            steps.append( final_step )

            steps_str = self.convert_dashes_incremental_steps(steps)
            all_steps.append( steps_str )

            gen_final_ans = extract_answer_gsm(response)
            all_gen_final_ans.append( gen_final_ans )

        question_list = mgsm_test.dataset['question']
        actual_answer = mgsm_test.dataset['answer_number']

        percentile_list = pd.DataFrame(data=zip(question_list,actual_answer,all_steps, all_gen_final_ans), columns=self.df_column_names)

        percentile_list.to_csv(processed_data_path, index = False)



## Main

In [17]:
# This will always be true. 
# I think you meant to use __name__ == '__main__' but this does not work in Jupyter Notebooks
if '__main__':
    context_model, dataset = load_model_dataset()
    
    responseProcessing = ResponseProcessing(context_model, dataset)
    responseProcessing.process_model_responses_for_analysis()

    

Device set to use cuda:0


## Inferencing

### Steps:
 1. read from "analysis_{model_name}"
 2. pass in model_generated_steps and query
 3. Check if there answer matches with our answer
 4. If yes, then use cc.getattribution() to attribution [contextCite](https://github.com/MadryLab/context-cite)
 5. Save the np.array to the respective row of the "analysis_{model_name}" set

In [18]:
context_model = Model(ModelNames.QwenInstruct)

model_responses = pd.read_csv(processed_data_path)
model_responses.head()

Device set to use cuda:0


Unnamed: 0,question,actual_answer,model_gen_steps,model_gen_answer
0,Janet’s ducks lay 16 eggs per day. She eats th...,18,"Step-by-Step Answer:\n1 First, calculate the ...",18.0
1,A robe takes 2 bolts of blue fiber and half th...,3,Step-by-Step Answer:\n1 The robe requires 2 b...,3.0
2,Josh decides to try flipping a house. He buys...,70000,Step-by-Step Answer:\n1 The original price of...,170000.0
3,James decides to run 3 sprints 3 times a week....,540,Step-by-Step Answer:\n1 James runs 3 sprints ...,3.0
4,"Every day, Wendi feeds each of her chickens th...",20,Step-by-Step Answer:\n1 The total amount of f...,20.0


In [21]:
# Iterate over the rows of the DataFrame
for index, row in model_responses.iterrows():
    context = row['model_gen_steps']
    query = row['question']
    
    # Remove first line from context (The filler "Step by Step")
    context = context.split("\n", 1)[1]
    
    # Abstain from pre-train because it creates a new model each time
    # Contructor is needed due to processing during initialization
    cc = ContextCiter(context_model.model, context_model.tokenizer, context, query)
    df = cc.get_attributions(as_dataframe=True, verbose=True)
    display(df)
    break

Attributed: To determine how much Janet makes from selling the eggs at the farmers' market each day, we need to follow these steps:

1. Calculate the total number of eggs laid per day:
   \[
   16 \text{ eggs}
   \]

2. Subtract the number of eggs Janet eats for breakfast each morning:
   \[
   16 - 3 = 13 \text{ eggs}
   \]

3. Subtract the number of eggs used to bake muffins for her friends:
   \[
   13 - 4 = 9 \text{ eggs}
   \]

4. Multiply the number of remaining eggs by the price per egg ($2):
   \[
   9 \times \$2 = \$18
   \]

Therefore, Janet makes $18 per day at the farmers' market.<|im_end|>


  0%|          | 0/64 [00:00<?, ?it/s]

Unnamed: 0,Score,Source
0,7.314,"4 Janet sells these remaining eggs at the farmers' market for $2 per egg, so multiply the number of eggs by this price: 9 * $2 = $18 per day."
1,0.0,The answer is 18.<
2,0.0,"3 She also bakes muffins for her friends using 4 eggs, so again subtract those: 13 - 4 = 9 eggs remaining."
3,0.0,"2 Janet eats 3 eggs for breakfast every morning, so subtract those: 16 - 3 = 13 eggs remaining."
4,0.0,"1 First, calculate the number of eggs laid by the ducks per day: 16 eggs/day."
