In [1]:
import pandas as pd
from litellm import completion
import litellm
import os
litellm.enable_json_schema_validation = True
litellm.set_verbose = True
from judge import Judge
import json
from litellm import ModelResponse
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
context_rows = 30

In [3]:
df = pd.read_csv("gpt4o with context.csv")

In [60]:
from pydantic import BaseModel
class Answer(BaseModel):
  reasoning: str
  answer: str



In [61]:
def generate_training_messages(df, start_idx:int, num_rows:int):
    end_idx = min(start_idx + num_rows, len(df))
    messages = [{
                'role': 'system',
                'content': '''Be a helpful assistant.
                You need to just give me the final answer and no other text. Don't tell the steps. Just give the final output for the answer key 
                and your reasoning in the reasoning key. 
                
                example:
                user query: What is the area of a rectangle with length 3cm and breadth 4cm. 
                assistant output: 
                {
                    "reasoning": "area of a rectangle is length * breadth, so here it will be 3cm*4cm which is 12cm squared."
                    "answer" : "area is 12 cm squared."
                }
                ''',
            }]
    for idx in range(start_idx, end_idx):
        row = df.iloc[idx]
        
        # Base messages that are common for all examples
        messages.extend([
            {
                'role': 'user',
                'content': row['problem']
            },
            {
                'role': 'assistant',
                'content': row['llm_raw_response']
            }
        ])
        
        # Add feedback based on correctness
        if not row['is_correct']:
            messages.append({
                'role': 'user',
                'content': f"Let me correct this. The right answer is {row['answer']}. Let's understand the solution: {row['solution']}"
            })
        else:
            messages.append({
                'role': 'user',
                'content': "Good job! Your reasoning and answer are correct!"
            })
        messages.append({
            'role': 'assistant',
            'content': "Understood. I will keep this in mind"
        })
            
       
    
    return messages

In [62]:
training_data = generate_training_messages(df,start_idx=0,num_rows=context_rows)

In [None]:
len(training_data)

In [64]:
def test_process_math_problems_te(df, start_idx:int, end_idx:int):
    for idx in range(start_idx, min(len(df), end_idx + 1)):
        problem = df.iloc[idx]['problem']
        answer = df.iloc[idx]["answer"]
        messages = training_data + [
            {
                'role': 'system',
                'content': '''Be a helpful assistant.
                You need to just give me the final answer and no other text. Don't tell the steps. Just give the final output for the answer key 
                and your reasoning in the reasoning key. 
                
                example:
                user query: What is the area of a rectangle with length 3cm and breadth 4cm. 
                assistant output: 
                {
                    "reasoning": "area of a rectangle is length * breadth, so here it will be 3cm*4cm which is 12cm squared."
                    "answer" : "area is 12 cm squared."
                }
                ''',
            },
            {
                'role': 'user',
                'content': problem,
            }]
        
        response = completion(model='gpt-4o', messages=messages,
            response_format=Answer
        )
        answer_content = response.choices[0]['message']['content']

        answer_obj = Answer.model_validate_json(answer_content)
        llm_answer = answer_obj.answer

        judge = Judge(model='gemini/gemini-2.0-flash')
        answer_correctness_obj = judge.prediction(query=df.iloc[idx]['problem'],answer1=answer,answer2=llm_answer)
        
        df.at[idx, f'llm_raw_response_test_context_{context_rows}'] = answer_content
        df.at[idx, f'llm_answer_test_{context_rows}'] = llm_answer
        df.at[idx, f'is_correct_test_{context_rows}'] = answer_correctness_obj.correct
        
        print(f"Processed row {idx}")


In [None]:
test_process_math_problems_te(df,start_idx=context_rows,end_idx=100)

In [66]:
df.to_csv('gpt4o with context.csv')

In [4]:
df = pd.read_csv('gpt4o with context.csv')

In [5]:
df_test = df.iloc[context_rows:100]

In [6]:
df_test['is_correct'].value_counts()

is_correct
True     47
False    23
Name: count, dtype: int64

In [7]:
df_test[f'is_correct_test_{context_rows}'].value_counts()

is_correct_test_30
True     45
False    25
Name: count, dtype: int64

In [12]:
df_test[f'is_correct_test_{context_rows}'].value_counts()[0]

  df_test[f'is_correct_test_{context_rows}'].value_counts()[0]


45