In [88]:
import pandas as pd
from litellm import completion
import litellm
import os
litellm.enable_json_schema_validation = True
litellm.set_verbose = True
from judge import Judge
import json
from litellm import ModelResponse
from dotenv import load_dotenv
load_dotenv()
import time

In [89]:
context_rows = 40

In [91]:
try:
    df = pd.read_csv("gemini with context.csv")
except FileNotFoundError:
    df = pd.read_csv("base accuracy 100 gemini.csv")

In [92]:
from pydantic import BaseModel
class Answer(BaseModel):
  reasoning: str
  answer: str

response_schema_answer = {
    "type": "object",
    "properties": {
        "reasoning": {
            "type": "string"
        },
        "answer": {
            "type": "string"
        }
    },
    "required": ["reasoning", "answer"]
}



In [93]:
def generate_training_messages(df, start_idx:int, num_rows:int):
    end_idx = min(start_idx + num_rows, len(df))
    messages = [{
                'role': 'system',
                'content': '''Be a helpful assistant.
                You need to just give me the final answer and no other text. Don't tell the steps. Just give the final output for the answer key 
                and your reasoning in the reasoning key. 
                
                example:
                user query: What is the area of a rectangle with length 3cm and breadth 4cm. 
                assistant output: 
                {
                    "reasoning": "area of a rectangle is length * breadth, so here it will be 3cm*4cm which is 12cm squared."
                    "answer" : "area is 12 cm squared."
                }
                ''',
            }]
    for idx in range(start_idx, end_idx):
        row = df.iloc[idx]
        
        # Base messages that are common for all examples
        messages.extend([
            {
                'role': 'user',
                'content': row['problem']
            },
            {
                'role': 'assistant',
                'content': row['llm_raw_response']
            }
        ])
        
        # Add feedback based on correctness
        if not row['is_correct']:
            messages.append({
                'role': 'user',
                'content': f"Let me correct this. The right answer is {row['answer']}. Let's understand the solution: {row['solution']}"
            })
        else:
            messages.append({
                'role': 'user',
                'content': "Good job! Your reasoning and answer are correct!"
            })
        messages.append({
            'role': 'assistant',
            'content': "Understood. I will keep this in mind"
        })
            
       
    
    return messages

In [94]:
training_data = generate_training_messages(df,start_idx=0,num_rows=context_rows)

In [None]:
len(training_data)

In [96]:
def test_process_math_problems_te(df, start_idx:int, end_idx:int):
    idx = start_idx
    while idx<end_idx:
        try:
            problem = df.iloc[idx]['problem']
            answer = df.iloc[idx]["answer"]
            messages = training_data + [
                {
                    'role': 'system',
                    'content': '''Be a helpful assistant.
                    You need to just give me the final answer and no other text. Don't tell the steps. Just give the final output for the answer key 
                    and your reasoning in the reasoning key. 
                    
                    example:
                    user query: What is the area of a rectangle with length 3cm and breadth 4cm. 
                    assistant output: 
                    {
                        "reasoning": "area of a rectangle is length * breadth, so here it will be 3cm*4cm which is 12cm squared."
                        "answer" : "area is 12 cm squared."
                    }
                    ''',
                },
                {
                    'role': 'user',
                    'content': problem,
                }]
            
            response = completion(model='gemini/gemini-1.5-flash', messages=messages,
                response_format={"type": "json_object", "response_schema": response_schema_answer}
            )
            answer_content = response.choices[0]['message']['content']

            answer_obj = Answer.model_validate_json(answer_content)
            llm_answer = answer_obj.answer

            judge = Judge(model='gemini/gemini-2.0-flash')
            answer_correctness_obj = judge.prediction(query=df.iloc[idx]['problem'],answer1=answer,answer2=llm_answer)
            
            df.at[idx, f'llm_raw_response_test_context_{context_rows}'] = answer_content
            df.at[idx, f'llm_answer_test_{context_rows}'] = llm_answer
            df.at[idx, f'is_correct_test_{context_rows}'] = answer_correctness_obj.correct
            
            print(f"Processed row {idx}")
            idx = idx + 1
        except Exception as e:
            time.sleep(60)
        


In [98]:
test_process_math_problems_te(df,start_idx=80,end_idx=100)



SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"answer\": \"997\",\n\"reasoning\": \"We are given the equation log(kx) = 2log(x+2). Using the properties of logarithms, we can rewrite this as log(kx) = log((x+2)^2). Since the logarithm is a one-to-one function, we can equate the arguments: kx = (x+2)^2. This simplifies to kx = x^2 + 4x + 4. Rearranging the terms, we get x^2 + (4-k)x + 4 = 0. This is a quadratic equation in x. For the quadratic equation to have exactly one real solution, its discriminant must be zero. The discriminant is (4-k)^2 - 4(1)(4) = k^2 - 8k + 16 - 16 = k^2 - 8k = k(k-8). Setting the discriminant to zero, we have k(k-8) = 0, which gives k = 0 or k = 8. However, if k = 0, then the original equation becomes log(0) which is undefined. Therefore, we must have k = 8. If k = 8, the equation becomes x^2 - 4x + 4 = 0, which factors as (x-2)^2 = 0, so x = 2. The given interval for k is [-500, 500]. The va



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.034107744693756104
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 274,
    "candidatesTokenCount": 8,
    "totalTokenCount": 282,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 274
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 80
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"7\",\n  \"reasoning\": \"Let t be the weight of a treek, s be the weight of a squig, and g be the weight of a goolee. We are given that 10t = 3s + g and 2t + g = s. From the second equation, we have g = s - 2t. Substituting this into the first equation, we get 10t = 3s + s - 2t, which simplifies to 12t = 4s. Dividing by 4, we get 3t = s. Therefore, one squig weighs as much as 3 treeks.\"\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.067095634921285133
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 25949,
    "candidatesTokenCount": 149,
    "totalTokenCount": 26098,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 25949
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 149
      }
    ]
  }



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.0016751200892031193
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 271,
    "candidatesTokenCount": 8,
    "totalTokenCount": 279,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 271
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 81
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"answer\": \"7\",\n\"reasoning\": \"Let A = (x1, y1) and B = (x2, y2). The slope of the line containing points A and B is given by m = (y2 - y1)/(x2 - x1). To maximize the slope, we need to maximize y2 - y1 and minimize x2 - x1.The coordinates of A lie within the square with opposite corners (0,0) and (2,2). Thus, 0 ≤ x1 ≤ 2 and 0 ≤ y1 ≤ 2. The coordinates of B lie within the square with opposite corners (4,2) and (5,3). Thus, 4 ≤ x2 ≤ 5 and 2 ≤ y2 ≤ 3.To maximize y2 - y1, we take the maximum value of y2 and the minimum value of y1, giving y2 - y1 ≤ 3 - 0 = 3.To minimize x2 - x1, we take the minimum value of x2 and the maximum value of x1, giving x2 - x1 ≥ 4 - 2 = 2. Then the slope m ≤ 3/2. However, this is incorrect. Let's consider the extreme points. A = (2,2) and B = (4,2). The slope is 0. A = (0,0) and B = (5,3). The slope is 3/5. A = (0,2) and B = (4,3). The slope is 



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.05418742448091507
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 312,
    "candidatesTokenCount": 8,
    "totalTokenCount": 320,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 312
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 82
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"answer\": \"x=1\",\n\"reasoning\": \"We are given the equation 3^(2x) + 19 = 10^x. We can rewrite this as (3^x)^2 + 19 = 10^x. Let y = 3^x. Then the equation becomes y^2 + 19 = 10^x. If we try some values, we can see that when x = 1, 3^(2(1)) + 19 = 9 + 19 = 28 and 10^1 = 10. This doesn't work. If x = 2, 3^(2(2)) + 19 = 81 + 19 = 100 and 10^2 = 100. This works, so x = 2 is a solution. Now let's try to solve algebraically. We have (3^x)^2 - 10^x + 19 = 0. This equation is difficult to solve analytically. Let's try another approach. Let's rewrite the equation as 9^x + 19 = 10^x. If we let x = 1, then 9^1 + 19 = 28 and 10^1 = 10. If we let x = 2, then 9^2 + 19 = 100 and 10^2 = 100. So x = 2 is a solution. Let's try to solve it by plotting. Let f(x) = 9^x + 19 - 10^x. We see that there is a root around x=2. Let's check if there are any other roots. The derivative is f'(x) = l



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": true\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.016197603195905685
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 247,
    "candidatesTokenCount": 8,
    "totalTokenCount": 255,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 247
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 83
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"answer\": \"11\",\n\"reasoning\": \"Let P(t) = (3t^2 + 5t + a)(4t^2 + bt - 2) = 12t^4 + 26t^3 - 8t^2 - 16t + 6.\\nExpanding the expression, we get:\\n12t^4 + 3bt^3 - 6t^2 + 20t^3 + 5bt^2 - 10t + 4at^2 + abt - 2a = 12t^4 + 26t^3 - 8t^2 - 16t + 6\\nComparing coefficients, we have:\\nCoefficients of t^3: 3b + 20 = 26, so 3b = 6, which means b = 2.\\nCoefficients of t: -10 + ab = -16, so ab = -6.\\nCoefficients of t^0: -2a = 6, so a = -3.\\nTherefore, a + b = -3 + 2 = -1.\\nLet's check the coefficients of t^2: -6 + 5b + 4a = -8\\n-6 + 10 - 12 = -8\\n-8 = -8\\nThis is consistent.\\nHowever, there is a mistake in the calculation. ab=-6 and a=-3, then b=2.\\nThen -6 + 5(2) + 4(-3) = -6 + 10 -12 = -8 which is consistent. a+b = -3+2 = -1. This is incorrect.\\nLet's check the multiplication again:\\n(3t^2 + 5t + a)(4t^2 + bt - 2) = 12t^4 + 3bt^3 - 6t^2 + 20t^3 + 5bt^2 - 10t + 4at^2



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.018911361694335938
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 282,
    "candidatesTokenCount": 8,
    "totalTokenCount": 290,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 282
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 84
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"√5 meters\",\n  \"reasoning\": \"Let r be the radius of the cylindrical tank in meters, and let h be the height of the water level in meters. The volume of the water in the tank at any given time is given by V = πr²h cubic meters. The rate at which the water is being added is 20π cubic meters per hour. Thus, dV/dt = 20π m³/hour.  The rate at which the water level is rising is dh/dt = 4 m/hour. Differentiating the volume formula with respect to time (t), we get dV/dt = πr²(dh/dt). Substituting the given values, we have 20π = πr²(4). Dividing both sides by 4π, we get 5 = r². Taking the square root of both sides, we get r = √5 meters.\"\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.18697583375863694
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 25963,
    "candidatesTokenCount": 199,




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": true\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.0015084120677784085
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 288,
    "candidatesTokenCount": 8,
    "totalTokenCount": 296,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 288
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 85
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"answer\": \"240\",\n\"reasoning\": \"To find the total number of outfits, we multiply the number of choices for each item of clothing.  There are 5 choices for shirts, 6 choices for pants, and 8 choices for hats. Therefore, the total number of outfits is 5 * 6 * 8 = 240.\"\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.08156815389307534
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 25940,
    "candidatesTokenCount": 82,
    "totalTokenCount": 26022,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 25940
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 82
      }
    ]
  },
  "modelVersion": "gemini-1.5-flash"
}



SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
F



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"correct\": true\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.01356289784113566
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 266,
    "candidatesTokenCount": 9,
    "totalTokenCount": 275,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 266
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 9
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 86
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"answer\": \"1\",\n\"reasoning\": \"We consider two cases:\\nCase 1: 5x - 1 \u003e= 0, which means x \u003e= 1/5.  Then |5x - 1| = 5x - 1, so the equation becomes 5x - 1 = x + 3.  Then 4x = 4, so x = 1.  Since 1 \u003e= 1/5, this is a valid solution.\\nCase 2: 5x - 1 \u003c 0, which means x \u003c 1/5.  Then |5x - 1| = -(5x - 1) = 1 - 5x, so the equation becomes 1 - 5x = x + 3.  Then 6x = -2, so x = -1/3.  Since -1/3 \u003c 1/5, this is a valid solution.\\nTherefore, the largest value of x that satisfies the equation is 1.\"\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.067371826171875
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 25924,
    "candidatesTokenCount": 225,
    "totalTokenCount": 26149,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 25924
     



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": true\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.0016215421492233872
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 246,
    "candidatesTokenCount": 8,
    "totalTokenCount": 254,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 246
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 87
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"2\",\n  \"reasoning\": \"For the graph of y = f(x) to intersect every horizontal line at least once, the function f(x) must be surjective (onto). This means that for every real number y, there must exist at least one real number x such that f(x) = y. Let's analyze the two parts of the function:\\n\\nIf x ≥ a, then f(x) = ax^2. Since x ≥ a, f(x) ≥ a^3. Also, this is a parabola opening upward, so it covers all values ≥ a^3.\\n\\nIf x \u003c a, then f(x) = ax + 2a. This is a line with slope a. It covers all values \u003c a^2 + 2a.\\n\\nTo ensure that the function intersects every horizontal line, we need the range of the two parts to overlap: a^3 ≤ a^2 + 2a. This inequality simplifies to a^3 - a^2 - 2a ≤ 0, or a(a^2 - a - 2) ≤ 0, which factors to a(a - 2)(a + 1) ≤ 0.  The solution to this inequality is -1 ≤ a ≤ 0 or a ≥ 2.  Since a is positive, we must have a ≥



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": true\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.0045958771370351315
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 315,
    "candidatesTokenCount": 8,
    "totalTokenCount": 323,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 315
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 88
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\"answer\": \"3\", \"reasoning\": \"To find the greatest common divisor (GCD) of 3339, 2961, and 1491, we can use the Euclidean algorithm. First, we find the GCD of 3339 and 2961:\\n3339 = 1(2961) + 378\\n2961 = 7(378) + 315\\n378 = 1(315) + 63\\n315 = 5(63) + 0\\nThe GCD of 3339 and 2961 is 63.\\nNext, we find the GCD of 63 and 1491:\\n1491 = 23(63) + 42\\n63 = 1(42) + 21\\n42 = 2(21) + 0\\nThe GCD of 63 and 1491 is 21.\\nThus, the GCD of 3339, 2961, and 1491 is 21.\\nAnother approach:\\nFind the prime factorizations of the three numbers:\\n3339 = 3 × 11 × 101\\n2961 = 3 × 987 = 3 × 3 × 329 = 3^2 × 7 × 47\\n1491 = 3 × 497 = 3 × 7 × 71\\nThe only prime factor that is common to all three is 3. Thus, the GCD is 3.\"}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.0970788426678296
    }
  ],
  "usageMetadata": {
   



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.021935832169320848
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 250,
    "candidatesTokenCount": 9,
    "totalTokenCount": 259,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 250
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 9
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 89
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"15/2\",\n  \"reasoning\": \"To divide 10 by 2/3, Remmy should multiply 10 by the reciprocal of 2/3, which is 3/2.  This is because division by a fraction is equivalent to multiplication by its reciprocal.  Therefore, 10 divided by 2/3 is 10 * (3/2) = 15.\"\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.13149086309939015
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 25944,
    "candidatesTokenCount": 98,
    "totalTokenCount": 26042,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 25944
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 98
      }
    ]
  },
  "modelVersion": "gemini-1.5-flash"
}



SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Fina



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": true\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.045464776456356049
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 273,
    "candidatesTokenCount": 8,
    "totalTokenCount": 281,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 273
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 90
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"2\",\n  \"reasoning\": \"For f(x) to have a real number value, the expression inside the square root must be non-negative, so x - 1 ≥ 0, which means x ≥ 1. Also, the denominator cannot be zero, so x - 2 ≠ 0, which means x ≠ 2. Combining these conditions, we need x ≥ 1 and x ≠ 2. The smallest integer value for x that satisfies these conditions is 1.  However, if x=1, the numerator is 0 and the denominator is -1, so f(x) = 0. If x=2, then the denominator is 0, so f(x) is undefined. If x=3, then f(x) = √2, which is a real number.  The smallest possible integer value for x such that f(x) has a real number value is 1.\"\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.12676523246017157
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 25946,
    "candidatesTokenCount": 204,
    "totalTokenCoun



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.11080583930015564
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 268,
    "candidatesTokenCount": 8,
    "totalTokenCount": 276,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 268
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 91
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"0.04118\",\n  \"reasoning\": \"Let X be the number of islands with treasure. We are given that there are 7 islands, and each island has a probability of 1/5 of having treasure. This is a binomial distribution with n = 7 and p = 1/5. The probability of exactly k successes in n trials is given by the binomial probability formula:\\nP(X=k) = (n choose k) * p^k * (1-p)^(n-k)\\nIn our case, we want to find the probability that exactly 4 islands have treasure, so k = 4. Thus,\\nP(X=4) = (7 choose 4) * (1/5)^4 * (4/5)^3\\n= 35 * (1/625) * (64/125)\\n= 35 * 64 / (625 * 125)\\n= 2240 / 78125\\n= 0.02867\\nThe calculation is incorrect. Let's calculate it again.\\nP(X=4) = (7 choose 4) * (1/5)^4 * (4/5)^3 = 35 * (1/625) * (64/125) = 2240/78125 ≈ 0.028672\\nWe have n = 7 trials (islands), probability of success p = 1/5, and we want k = 4 successes (islands with treasure



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": true\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.036708846688270569
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 280,
    "candidatesTokenCount": 8,
    "totalTokenCount": 288,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 280
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 92
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"answer\": \"30\",\n\"reasoning\": \"To find the value of c, we complete the square for the given equation x^2 - 10x + y^2 + 6y + c = 0 to obtain the standard form of a circle (x-a)^2 + (y-b)^2 = r^2, where (a,b) is the center and r is the radius. For the x terms, we have x^2 - 10x. To complete the square, we take half of the coefficient of x (-10/2 = -5), square it (-5)^2 = 25, and add and subtract it to the equation. For the y terms, we have y^2 + 6y. To complete the square, we take half of the coefficient of y (6/2 = 3), square it (3)^2 = 9, and add and subtract it to the equation. So, the equation becomes (x^2 - 10x + 25) + (y^2 + 6y + 9) + c - 25 - 9 = 0. This simplifies to (x - 5)^2 + (y + 3)^2 = 34 - c.  The radius of the circle is √(34 - c). Since the radius is given as 1, we set √(34 - c) = 1.  Squaring both sides gives 34 - c = 1, so c = 33. However, the equation



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -5.698025537033876e-05
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 269,
    "candidatesTokenCount": 9,
    "totalTokenCount": 278,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 269
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 9
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 93
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"answer\": \"110\",\n\"reasoning\": \"Let ∠BAD = 3x and ∠CDA = 3y. In quadrilateral ABCD, the sum of angles is 360 degrees. Therefore, ∠ABC + ∠BCD + ∠CDA + ∠DAB = 360. Substituting the values, we get 110 + 100 + 3y + 3x = 360. This simplifies to 3x + 3y = 150, so x + y = 50. In triangle AFD, the sum of angles is 180 degrees. Therefore, ∠FAD + ∠FDA + ∠AFD = 180. Substituting ∠FAD = 2x and ∠FDA = 2y, we have 2x + 2y + ∠AFD = 180. Since x + y = 50, we have 2(x + y) + ∠AFD = 180, which simplifies to 100 + ∠AFD = 180. Therefore, ∠AFD = 80. This is incorrect. Let's use the property that the sum of exterior angles of a polygon is 360 degrees. In triangle ABD, the exterior angle at A is 110. In triangle ACD, the exterior angle at A is 100. In triangle ABD, we have 110 = x + x = 2x. So x = 55. In triangle ACD, 100 = y + y = 2y. So y = 50. In triangle AFD, we have 2x + 2y + ∠AFD = 1



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.045722786337137222
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 597,
    "candidatesTokenCount": 8,
    "totalTokenCount": 605,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 597
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 94
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"y = -1\",\n  \"reasoning\": \"Let \\\\textbf{v} = \\\\begin{pmatrix} 2 \\\\\\\\ y \\\\\\\\ -5 \\\\end{pmatrix} and \\\\textbf{w} = \\\\begin{pmatrix} 1 \\\\\\\\ -2 \\\\\\\\ 1 \\\\end{pmatrix}. The projection of \\\\textbf{v} onto \\\\textbf{w} is given by\\n\\\\begin{align*} \\\\text{proj}_{\\\\textbf{w}} \\\\textbf{v} &= \\\\frac{\\\\textbf{v} \\\\cdot \\\\textbf{w}}{\\\\|\\\\textbf{w}\\\\|^2} \\\\textbf{w} \\\\\\\\\\\\\\\\ &= \\\\frac{\\\\begin{pmatrix} 2 \\\\\\\\ y \\\\\\\\ -5 \\\\end{pmatrix} \\\\cdot \\\\begin{pmatrix} 1 \\\\\\\\ -2 \\\\\\\\ 1 \\\\end{pmatrix}}{\\\\left\\\\| \\\\begin{pmatrix} 1 \\\\\\\\ -2 \\\\\\\\ 1 \\\\end{pmatrix} \\\\right\\\\|^2} \\\\begin{pmatrix} 1 \\\\\\\\ -2 \\\\\\\\ 1 \\\\end{pmatrix} \\\\\\\\\\\\\\\\ &= \\\\frac{2 - 2y - 5}{1^2 + (-2)^2 + 1^2} \\\\begin{pmatrix} 1 \\\\\\\\ -2 \\\\\\\\ 1 \\\\end{pmatrix} \\\\\\\\\\\\\\\\ &= \



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n\"correct\": true\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.043251544237136841
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 297,
    "candidatesTokenCount": 8,
    "totalTokenCount": 305,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 297
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 8
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 95
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"1,2,3,4\",\n  \"reasoning\": \"Let the given equation be\\n\\n\\\\frac{(x+1)(x-3)}{5(x+2)(x-4)} + \\\\frac{(x+3)(x-5)}{9(x+4)(x-6)} - \\\\frac{2(x+5)(x-7)}{13(x+6)(x-8)} = \\\\frac{92}{585}\\n\\nLet f(x) = \\\\frac{(x+1)(x-3)}{5(x+2)(x-4)} + \\\\frac{(x+3)(x-5)}{9(x+4)(x-6)} - \\\\frac{2(x+5)(x-7)}{13(x+6)(x-8)}\\n\\nThen f(1) = \\\\frac{2(-2)}{5(3)(-3)} + \\\\frac{4(-4)}{9(5)(-5)} - \\\\frac{2(6)(-6)}{13(7)(-7)} = \\\\frac{4}{45} + \\\\frac{16}{225} + \\\\frac{72}{637} = \\\\frac{4}{45} + \\\\frac{16}{225} + \\\\frac{72}{637} \\\\approx 0.0889 + 0.0711 + 0.113 = 0.273\\n\\nf(2) = \\\\frac{3(-1)}{5(4)(-2)} + \\\\frac{5(-3)}{9(6)(-4)} - \\\\frac{2(7)(-5)}{13(8)(-6)} = \\\\frac{3}{40} + \\\\frac{5}{216} + \\\\frac{70}{624} \\\\approx 0.075 + 0.023 + 0.112 = 0.21\\n\\nf(3) = \\\\frac{4(0)}{5(5)(-1)} + \\\\frac{6(-2)}{9(7)(-3)} - \\\\frac{2(8)(-4)}{13(9)(-5)} = 



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.16891770892673069
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 344,
    "candidatesTokenCount": 9,
    "totalTokenCount": 353,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 344
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 9
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 96
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"West\",\n  \"reasoning\": \"A full rotation is 360 degrees.  When the figure skater spins 2250 degrees, this is equivalent to 2250/360 = 6.25 rotations. The 0.25 rotation is equivalent to 0.25 * 360 = 90 degrees. Since she spins to her right (clockwise), and she starts facing north, after 90 degrees she faces west.\"\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.16686957223074778
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 25946,
    "candidatesTokenCount": 112,
    "totalTokenCount": 26058,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 25946
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 112
      }
    ]
  },
  "modelVersion": "gemini-1.5-flash"
}



SYNC kwargs[caching]: False; 



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"correct\": false\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.00090103337748183147
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 269,
    "candidatesTokenCount": 9,
    "totalTokenCount": 278,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 269
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 9
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 97
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"2k + 2\",\n  \"reasoning\": \"To simplify the expression (-k + 4) + (-2 + 3k), we combine like terms. The terms with 'k' are -k and 3k, and the constant terms are 4 and -2. Combining the 'k' terms, we get -k + 3k = 2k. Combining the constant terms, we get 4 + (-2) = 2. Therefore, the simplified expression is 2k + 2.\"\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.026433908939361574
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 25917,
    "candidatesTokenCount": 120,
    "totalTokenCount": 26037,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 25917
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 120
      }
    ]
  },
  "modelVersion": "gemini-1.5-flash"
}



SYNC kwargs[caching]: False



RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"correct\": true\n}"
          }
        ],
        "role": "model"
      },
      "finishReason": "STOP",
      "avgLogprobs": -0.061208652125464544
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 246,
    "candidatesTokenCount": 9,
    "totalTokenCount": 255,
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 246
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 9
      }
    ]
  },
  "modelVersion": "gemini-2.0-flash"
}



Processed row 98
SYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache')['no-cache']: False
Final returned optional params: {'response_mime_type': 'application/json', 'response_schema': {'type': 'object', 'properties': {'reasoning': {'type': 'string'}, 'answer': {'type': 'string'}}, 'required': ['reasoning', 'answer']}}




RAW RESPONSE:
{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "{\n  \"answer\": \"[0,1,1]\",\n  \"reasoning\": \"Let \\\\mathbf{v} = \\\\begin{pmatrix} x \\\\ y \\\\ z \\\\end{pmatrix}.  Then \\\\mathbf{a} \\\\cdot \\\\mathbf{v} = x + y + z = 2, and \\\\mathbf{a} \\\\times \\\\mathbf{v} = \\\\begin{pmatrix} y - z \\\\ z - x \\\\ x - y \\\\end{pmatrix} = \\\\begin{pmatrix} 1 \\\\ -2 \\\\ 1 \\\\end{pmatrix}.  Then y - z = 1, z - x = -2, and x - y = 1.  From x - y = 1, x = y + 1.  From z - x = -2, z = x - 2 = y - 1.  From y - z = 1, y - (y - 1) = 1, which is true.  Then x + y + z = (y + 1) + y + (y - 1) = 3y = 2, so y = 2/3.  Then x = 5/3 and z = -1/3.  Then \\\\mathbf{v} = \\\\begin{pmatrix} 5/3 \\\\ 2/3 \\\\ -1/3 \\\\end{pmatrix}.  However, this vector doesn't work.  Let \\\\mathbf{v} = \\\\begin{pmatrix} x \\\\ y \\\\ z \\\\end{pmatrix}.  Then x + y + z = 2, and\\n\\\\begin{pmatrix} y - z \\\\ z - x \\\\ x - y \\\\end{pmatrix} = \\\\begin

In [99]:
df.to_csv('gemini with context.csv')

In [100]:
df = pd.read_csv('gemini with context.csv')

In [101]:
df_test = df.iloc[context_rows:100]

In [102]:
df_test['is_correct'].value_counts()

is_correct
False    37
True     23
Name: count, dtype: int64

In [103]:
df_test[f'is_correct_test_{context_rows}'].value_counts()

is_correct_test_40
False    34
True     26
Name: count, dtype: int64