# DS-1000

In [1]:
from datasets import load_dataset
import traceback
import os
import cmbagent

# 1. Load top 10 samples from DS-1000
ds = load_dataset("xlangai/DS-1000", split="test")

In [2]:
len(ds)

1000

In [3]:
def extract_from_tags(text: str, start_tag: str, end_tag: str) -> str:
    start_index = len(start_tag) if text.startswith(start_tag) else 0
    end_index = text.find(end_tag, len(start_tag))
    end_index = end_index if end_index != -1 else len(text)
    return text[start_index:end_index]

def postprocess(code: str) -> str:
    
    code = extract_from_tags(code, "```python\n", "\n```")
    code = extract_from_tags(code, "```\n", "\n```")  # new pattern
    code = extract_from_tags(code, "<code>", "</code>")
    code = extract_from_tags(code, "", "</code>")
    code = extract_from_tags(code, "", "\nEND SOLUTION")
    code = extract_from_tags(code, "", "\n### END SOLUTION")
    return code.strip()

def extract_between_closing_and_opening(text: str, closing_tag: str, opening_tag: str) -> str:
    start_index = text.find(closing_tag)
    if start_index == -1:
        return ""
    start_index += len(closing_tag)
    end_index = text.find(opening_tag, start_index)
    if end_index == -1:
        return text[start_index:].strip()
    return text[start_index:end_index].strip()

In [4]:
def get_solution(cmbagent_results):
    chat_history = cmbagent_results['chat_history']
    final_context = cmbagent_results['final_context']
    # Build path to result.txt
    result_path = os.path.join(
        final_context['work_dir'],
        #final_context['database_path'],
        "result.txt"
    )
    result_path = os.path.abspath(result_path)

    # Read result answer
    try:
        with open(result_path, 'r') as f:
            result_answer = f.read().strip()
            return result_answer
    except FileNotFoundError as e:
        return f"Result file not found: {e}"



## One shot

In [5]:
# samples = ds.select(list(range(30, 40)))
samples = ds.select(list(range(0, 20)))

In [6]:
# # enumerate(samples)
# for i, sample in enumerate(samples):
#     print("==="*18)
#     # print(replace_prompt_end(sample['prompt']))
#     print(sample['prompt'])

In [7]:
def my_agent(task, metadata):

    results = cmbagent.one_shot(task,
                                max_rounds=10,
                                agent='researcher',
                                researcher_filename = "result.txt",
                                # researcher_model='gpt-4o',
                                researcher_model="gemini-2.5-pro-preview-03-25",
                                work_dir="/Users/boris/Desktop/cmbagent_evals",
                                )


    # return get_result(results)
    return get_solution(results)


In [8]:
results = []

for i, sample in enumerate(samples):
    print(f"### Sample {i+1}: {sample['metadata']['problem_id']} ###")
    # print(f"Write a short code following the given format and indentation. "
    # f"Place the executable code between <code> and </code> tags, without any other non-executable things \n"
    # f"Also save the code you place between <code> and </code> tags in a result.txt file "
    # f"Only provide the code completion needed. Don't repeat the context code or add any unnecessary lines or comments."
    # f"Prompt:\n {sample['prompt']}")
    

    # Run agent
    raw_output = my_agent( rf"""
Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
{sample['prompt']}
"""
    , sample["metadata"]
    )
    
    print(raw_output)  # Check structur
    # import sys; sys.exit()
    
    #solution_code = postprocess(raw_output)
    solution_code = extract_between_closing_and_opening(raw_output,"<code>", "</code>")
    
    print("Agent Solution:")
    print(solution_code)
    # import sys; sys.exit()
    # Build the full test script
    code_context = sample["code_context"]
    full_code = (
        f"{code_context}\n"
        + f"solution = '''{solution_code}'''\n"
        + "test_execution(solution)\n"
    )
    if "test_string(" in code_context:
        full_code += "test_string(solution)\n"

    print("\n--- Running Test ---")
    print(full_code)
    try:
        exec_locals = {}
        #exec(full_code, {}, exec_locals)
        exec(full_code)
        print("✅ Passed")
        results.append(("C", sample["metadata"]["problem_id"]))
    except Exception as e:
        print("❌ Failed")
        traceback.print_exc()
        results.append(("I", sample["metadata"]["problem_id"]))
    
    print("="*80)

### Sample 1: 0 ###

Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have the following DataFrame:
    Col1  Col2  Col3  Type
0      1     2     3     1
1      4     5     6     1
2      7     8     9     2
3    10    11    12     2
4    13    14    15     3
5    16    17    18     3


The DataFrame is read from a CSV file. All rows which have Type 1 are on top, followed by the rows with Type 2, followed by the rows with Type 3, etc.
I would like to shuffle the order of the DataFrame's rows according to a list. \
For example, give a list [2, 4, 0, 3, 1, 5] and desired result should be:
    Col1  Col2  Col3  Type
2      7     8     9

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 55, in <module>
  File "<string>", line 52, in test_execution
KeyError: 'result'



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have the following DataFrame:
    Col1  Col2  Col3  Type
0      1     2     3     1
1      4     5     6     1
2      7     8     9     2
3    10    11    12     2
4    13    14    15     3
5    16    17    18     3


The DataFrame is read from a CSV file. All rows which have Type 1 are on top, followed by the rows with Type 2, followed by the rows with Type 3, etc.
I would like to shuffle the order of the DataFrame's rows according to a list. 
For example, give a list [2, 4, 0, 3, 1, 5] and desired DataFrame should be:
    Col1  Col2  Col3  Type
2      7     8     9     2
4     13   

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 56, in <module>
  File "<string>", line 53, in test_execution
KeyError: 'result'



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have following pandas dataframe :


import pandas as pd 
from pandas import Series, DataFrame
data = DataFrame({'Qu1': ['apple', 'potato', 'cheese', 'banana', 'cheese', 'banana', 'cheese', 'potato', 'egg'],
              'Qu2': ['sausage', 'banana', 'apple', 'apple', 'apple', 'sausage', 'banana', 'banana', 'banana'],
              'Qu3': ['apple', 'potato', 'sausage', 'cheese', 'cheese', 'potato', 'cheese', 'potato', 'egg']})


I'd like to change values in columns Qu1,Qu2,Qu3 according to value_counts() when value count great or equal 2
For example for Qu1 column 
>>> pd.value_counts(

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 120, in <module>
  File "<string>", line 117, in test_execution
KeyError: 'result'



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have following pandas dataframe :


import pandas as pd
from pandas import Series, DataFrame
data = DataFrame({'Qu1': ['apple', 'potato', 'cheese', 'banana', 'cheese', 'banana', 'cheese', 'potato', 'egg'],
              'Qu2': ['sausage', 'banana', 'apple', 'apple', 'apple', 'sausage', 'banana', 'banana', 'banana'],
              'Qu3': ['apple', 'potato', 'sausage', 'cheese', 'cheese', 'potato', 'cheese', 'potato', 'egg']})


I'd like to change values in columns Qu1,Qu2,Qu3 according to value_counts() when value count great or equal 3
For example for Qu1 column
>>> pd.value_counts(da

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 120, in <module>
  File "<string>", line 117, in test_execution
KeyError: 'result'



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have following pandas dataframe :


import pandas as pd 
from pandas import Series, DataFrame
data = DataFrame({'Qu1': ['apple', 'potato', 'cheese', 'banana', 'cheese', 'banana', 'cheese', 'potato', 'egg'],
              'Qu2': ['sausage', 'banana', 'apple', 'apple', 'apple', 'sausage', 'banana', 'banana', 'banana'],
              'Qu3': ['apple', 'potato', 'sausage', 'cheese', 'cheese', 'potato', 'cheese', 'potato', 'egg']})


I'd like to change values in columns Qu1,Qu2,Qu3 according to value_counts() when value count great or equal 2
For example for Qu1 column 
>>> pd.value_counts(

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 126, in <module>
  File "<string>", line 118, in test_execution
  File "<string>", line 5
    for col in ['Qu1', 'Qu2', 'Qu3']:
    ^^^
IndentationError: expected an indented block after function definition on line 4



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have following pandas dataframe :


import pandas as pd
from pandas import Series, DataFrame
data = DataFrame({'Qu1': ['apple', 'potato', 'cheese', 'banana', 'cheese', 'banana', 'cheese', 'potato', 'egg'],
              'Qu2': ['sausage', 'banana', 'apple', 'apple', 'apple', 'sausage', 'banana', 'banana', 'banana'],
              'Qu3': ['apple', 'potato', 'sausage', 'cheese', 'cheese', 'potato', 'cheese', 'potato', 'egg']})


I'd like to change values in columns Qu1 according to value_counts() when value count great or equal 3 and change values in columns Qu2 and Qu3 according to val

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 139
    result.loc[condition_for_replacement, column_name] = 'other''''
                                                                  ^
SyntaxError: unterminated string literal (detected at line 139)



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have a dataset :
id    url     keep_if_dup
1     A.com   Yes
2     A.com   Yes
3     B.com   No
4     B.com   No
5     C.com   No


I want to remove duplicates, i.e. keep first occurence of "url" field, BUT  keep duplicates if the field "keep_if_dup" is YES.
Expected output :
id    url     keep_if_dup
1     A.com   Yes
2     A.com   Yes
3     B.com   No
5     C.com   No


What I tried :
Dataframe=Dataframe.drop_duplicates(subset='url', keep='first')


which of course does not take into account "keep_if_dup" field. Output is :
id    url     keep_if_dup
1     A.com   Yes
3     B.com   N

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 65, in <module>
  File "<string>", line 56, in test_execution
KeyError: 'result'



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have a dataset :
id    url     drop_if_dup
1     A.com   Yes
2     A.com   Yes
3     B.com   No
4     B.com   No
5     C.com   No


I want to remove duplicates, i.e. keep first occurence of "url" field, BUT keep duplicates if the field "drop_if_dup" is No.
Expected output :
id    url     drop_if_dup
1     A.com   Yes
3     B.com   No
4     B.com   No
5     C.com   No


What I tried :
Dataframe=Dataframe.drop_duplicates(subset='url', keep='first')


which of course does not take into account "drop_if_dup" field. Output is :
id    url     drop_if_dup
1     A.com   Yes
3     B.com   No
5

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 59, in <module>
  File "<string>", line 56, in test_execution
KeyError: 'result'



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have a dataset :
id    url     keep_if_dup
1     A.com   Yes
2     A.com   Yes
3     B.com   No
4     B.com   No
5     C.com   No


I want to remove duplicates, i.e. keep last occurence of "url" field, BUT keep duplicates if the field "keep_if_dup" is YES.
Expected output :
id    url     keep_if_dup
1     A.com   Yes
2     A.com   Yes
4     B.com   No
5     C.com   No


What I tried :
Dataframe=Dataframe.drop_duplicates(subset='url', keep='first')


which of course does not take into account "keep_if_dup" field. Output is :
id    url     keep_if_dup
1     A.com   Yes
3     B.com   No


Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 59, in <module>
  File "<string>", line 56, in test_execution
KeyError: 'result'



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I'm Looking for a generic way of turning a DataFrame to a nested dictionary
This is a sample data frame 
    name    v1  v2  v3
0   A       A1  A11 1
1   A       A2  A12 2
2   B       B1  B12 3
3   C       C1  C11 4
4   B       B2  B21 5
5   A       A2  A21 6


The number of columns may differ and so does the column names.
like this : 
{
'A' : { 
    'A1' : { 'A11' : 1 }
    'A2' : { 'A12' : 2 , 'A21' : 6 }} , 
'B' : { 
    'B1' : { 'B12' : 3 } } , 
'C' : { 
    'C1' : { 'C11' : 4}}
}


What is best way to achieve this ? 
closest I got was with the zip function but haven't managed to ma

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 76, in <module>
  File "<string>", line 66, in test_execution
AssertionError



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have been struggling with removing the time zone info from a column in a pandas dataframe. I have checked the following question, but it does not work for me:


Can I export pandas DataFrame to Excel stripping tzinfo?


I used tz_localize to assign a timezone to a datetime object, because I need to convert to another timezone using tz_convert. This adds an UTC offset, in the way "-06:00". I need to get rid of this offset, because it results in an error when I try to export the dataframe to Excel.


Actual output


2015-12-01 00:00:00-06:00


Desired output
2015-12-01 00:00:00


I have

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 78, in <module>
  File "<string>", line 66, in test_execution
  File "<string>", line 5
    df['datetime'] = df['datetime'].dt.tz_localize(None)
    ^^
IndentationError: expected an indented block after function definition on line 4



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have been struggling with removing the time zone info from a column in a pandas dataframe. I have checked the following question, but it does not work for me:


Can I export pandas DataFrame to Excel stripping tzinfo?


I used tz_localize to assign a timezone to a datetime object, because I need to convert to another timezone using tz_convert. This adds an UTC offset, in the way "-06:00". I need to get rid of this offset, because it results in an error when I try to export the dataframe to Excel.


Actual output


2015-12-01 00:00:00-06:00


Desired output
01-Dec-2015 00:00:00


I hav

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 80, in <module>
  File "<string>", line 68, in test_execution
AssertionError



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have been struggling with removing the time zone info from a column in a pandas dataframe. I have checked the following question, but it does not work for me:


Can I export pandas DataFrame to Excel stripping tzinfo?


I used tz_localize to assign a timezone to a datetime object, because I need to convert to another timezone using tz_convert. This adds an UTC offset, in the way "-06:00". I need to get rid of this offset, because it results in an error when I try to export the dataframe to Excel.


Actual output


2015-12-01 00:00:00-06:00


Desired output
2015-12-01 00:00:00


I have

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 94, in <module>
  File "<string>", line 75, in test_execution
AssertionError



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have a dataframe that looks like this:
     product     score
0    1179160  0.424654
1    1066490  0.424509
2    1148126  0.422207
3    1069104  0.420455
4    1069105  0.414603
..       ...       ...
491  1160330  0.168784
492  1069098  0.168749
493  1077784  0.168738
494  1193369  0.168703
495  1179741  0.168684


what I'm trying to achieve is to multiply certain score values corresponding to specific products by a constant.
I have the products target of this multiplication in a list like this: [1069104, 1069105] (this is just a simplified
example, in reality it would be more than tw

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 107, in <module>
  File "<string>", line 104, in test_execution
AssertionError



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have a dataframe that looks like this:
     product     score
0    1179160  0.424654
1    1066490  0.424509
2    1148126  0.422207
3    1069104  0.420455
4    1069105  0.414603
..       ...       ...
491  1160330  0.168784
492  1069098  0.168749
493  1077784  0.168738
494  1193369  0.168703
495  1179741  0.168684


what I'm trying to achieve is to multiply certain score values corresponding to specific products by a constant.
I have a list like this: [1069104, 1069105] (this is just a simplified
example, in reality it would be more than two products) and my goal is to obtain this:
Mul

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 104, in <module>
  File "<string>", line 77, in test_execution
  File "<string>", line 6
    df_operational = current_df.copy()
                   ^
SyntaxError: invalid syntax


## Planning and Control

In [13]:
samples = ds.select(list(range(0, 20)))

In [14]:
# # enumerate(samples)
# for i, sample in enumerate(samples):
#     print("==="*18)
#     # print(replace_prompt_end(sample['prompt']))
#     # print(sample['prompt'])

In [15]:
def my_agent(task, metadata):

    # results = cmbagent.one_shot(task,
    #                             max_rounds=10,
    #                             agent='researcher',
    #                             researcher_filename = "result.txt",
    #                             researcher_model='gpt-4o',
    #                             work_dir="/Users/boris/Desktop/cmbagent_evals",
    #                             )

    results = cmbagent.planning_and_control(task+r"""
=========
Use researcher agent to propose a solution, then engineer to test (using the full code), then researcher to  provide the final code completion as requested in the problem statement. 
The plan must end with researcher.
1. researcher
2. engineer
3. researcher
This order must never be changed.
""",
                                # max_rounds=10,
                                plan_instructions = r"""
Use researcher agent to propose a solution, then engineer to test, then researcher to formulate the final response to the problem of interest. 
The plan must end with researcher.
1. researcher
2. engineer
3. researcher
This order must never be changed.
""",
                                plan_reviewer_model = "claude-3-7-sonnet-20250219",
                                # agent='researcher',
                                researcher_filename = "result.txt",
                                # researcher_model='gpt-4o',
                                researcher_model="gemini-2.5-pro-preview-03-25",
                                engineer_model="gemini-2.5-pro-preview-03-25",
                                work_dir="/Users/boris/Desktop/cmbagent_evals",
                                )


    # return get_result(results)
    return get_solution(results)


In [16]:
results = []

for i, sample in enumerate(samples):
    print(f"### Sample {i+1}: {sample['metadata']['problem_id']} ###")
    # print(f"Write a short code following the given format and indentation. "
    # f"Place the executable code between <code> and </code> tags, without any other non-executable things \n"
    # f"Also save the code you place between <code> and </code> tags in a result.txt file "
    # f"Only provide the code completion needed. Don't repeat the context code or add any unnecessary lines or comments."
    # f"Prompt:\n {sample['prompt']}")
    

    # Run agent
    raw_output = my_agent( rf"""
Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
{sample['prompt']}
"""
    , sample["metadata"]
    )
    
    print(raw_output)  # Check structur
    # import sys; sys.exit()
    
    #solution_code = postprocess(raw_output)
    solution_code = extract_between_closing_and_opening(raw_output,"<code>", "</code>")
    
    print("Agent Solution:")
    print(solution_code)
    # import sys; sys.exit()
    # Build the full test script
    code_context = sample["code_context"]
    full_code = (
        f"{code_context}\n"
        + f"solution = '''{solution_code}'''\n"
        + "test_execution(solution)\n"
    )
    if "test_string(" in code_context:
        full_code += "test_string(solution)\n"

    print("\n--- Running Test ---")
    print(full_code)
    try:
        exec_locals = {}
        #exec(full_code, {}, exec_locals)
        exec(full_code)
        print("✅ Passed")
        results.append(("C", sample["metadata"]["problem_id"]))
    except Exception as e:
        print("❌ Failed")
        traceback.print_exc()
        results.append(("I", sample["metadata"]["problem_id"]))
    
    print("="*80)

### Sample 1: 0 ###

Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have the following DataFrame:
    Col1  Col2  Col3  Type
0      1     2     3     1
1      4     5     6     1
2      7     8     9     2
3    10    11    12     2
4    13    14    15     3
5    16    17    18     3


The DataFrame is read from a CSV file. All rows which have Type 1 are on top, followed by the rows with Type 2, followed by the rows with Type 3, etc.
I would like to shuffle the order of the DataFrame's rows according to a list. \
For example, give a list [2, 4, 0, 3, 1, 5] and desired result should be:
    Col1  Col2  Col3  Type
2      7     8     9

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 56, in <module>
  File "<string>", line 53, in test_execution
KeyError: 'result'



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have following pandas dataframe :


import pandas as pd 
from pandas import Series, DataFrame
data = DataFrame({'Qu1': ['apple', 'potato', 'cheese', 'banana', 'cheese', 'banana', 'cheese', 'potato', 'egg'],
              'Qu2': ['sausage', 'banana', 'apple', 'apple', 'apple', 'sausage', 'banana', 'banana', 'banana'],
              'Qu3': ['apple', 'potato', 'sausage', 'cheese', 'cheese', 'potato', 'cheese', 'potato', 'egg']})


I'd like to change values in columns Qu1,Qu2,Qu3 according to value_counts() when value count great or equal 2
For example for Qu1 column 
>>> pd.value_counts(

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 120, in <module>
  File "<string>", line 117, in test_execution
KeyError: 'result'



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have following pandas dataframe :


import pandas as pd
from pandas import Series, DataFrame
data = DataFrame({'Qu1': ['apple', 'potato', 'cheese', 'banana', 'cheese', 'banana', 'cheese', 'potato', 'egg'],
              'Qu2': ['sausage', 'banana', 'apple', 'apple', 'apple', 'sausage', 'banana', 'banana', 'banana'],
              'Qu3': ['apple', 'potato', 'sausage', 'cheese', 'cheese', 'potato', 'cheese', 'potato', 'egg']})


I'd like to change values in columns Qu1,Qu2,Qu3 according to value_counts() when value count great or equal 3
For example for Qu1 column
>>> pd.value_counts(da

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 124
    result.loc[mask_for_replacement, col_name] = 'other''''
                                                          ^
SyntaxError: unterminated string literal (detected at line 124)



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have following pandas dataframe :


import pandas as pd 
from pandas import Series, DataFrame
data = DataFrame({'Qu1': ['apple', 'potato', 'cheese', 'banana', 'cheese', 'banana', 'cheese', 'potato', 'egg'],
              'Qu2': ['sausage', 'banana', 'apple', 'apple', 'apple', 'sausage', 'banana', 'banana', 'banana'],
              'Qu3': ['apple', 'potato', 'sausage', 'cheese', 'cheese', 'potato', 'cheese', 'potato', 'egg']})


I'd like to change values in columns Qu1,Qu2,Qu3 according to value_counts() when value count great or equal 2
For example for Qu1 column 
>>> pd.value_counts(

Traceback (most recent call last):
  File "/var/folders/h0/4_tf3pcn1h32ks9grh325v400000gn/T/ipykernel_38748/299325331.py", line 49, in <module>
    exec(full_code)
  File "<string>", line 133, in <module>
  File "<string>", line 118, in test_execution
  File "<string>", line 5
    result_df = df.copy()
    ^^^^^^^^^
IndentationError: expected an indented block after function definition on line 4



Write a short code following the given format and indentation, to complete the provided code.
In the final answer, only provide the code completion needed. Make sure to preserve indentation. 
Place the executable code between <code> and </code> tags.
There must be no other non-executable things in your response. 
Don't repeat the context code or add any unnecessary lines or comments.. 
Prompt:
Problem:
I have following pandas dataframe :


import pandas as pd
from pandas import Series, DataFrame
data = DataFrame({'Qu1': ['apple', 'potato', 'cheese', 'banana', 'cheese', 'banana', 'cheese', 'potato', 'egg'],
              'Qu2': ['sausage', 'banana', 'apple', 'apple', 'apple', 'sausage', 'banana', 'banana', 'banana'],
              'Qu3': ['apple', 'potato', 'sausage', 'cheese', 'cheese', 'potato', 'cheese', 'potato', 'egg']})


I'd like to change values in columns Qu1 according to value_counts() when value count great or equal 3 and change values in columns Qu2 and Qu3 according to val

KeyboardInterrupt: 