In [None]:
import pandas as pd
import json
import model_eval as me
import spider_utils_py as sp_utils
from spider_utils_py import load_csv_database
import numpy as np
import time
import pickle
import ast
import matplotlib.pyplot as plt
import sys, os
import math

In [None]:
# M
GLOBAL_ROUND_COUNT = 1
# N
GLOBAL_ITERATION_COUNT = 1
# Number of notebooks to predict
GLOBAL_RUN_ALL = True

GLOBAL_NOTEBOOK_COUNT = 1
if GLOBAL_RUN_ALL:
    GLOBAL_NOTEBOOK_COUNT = 'ALL'
# Number of Intents to trim
GLOBAL_MAX_CHAR_LIMIT = 500000 #512000
# Number of lines of returned outputs
GLOBAL_CODE_OUTPUT_LINES = 10
# LLM Model Selection
GLOBAL_LLM_MODEL = 'anthropic'



In [None]:

# # Load the Arcade dataset
# df =  me.load_from_pkl('arcade_dataset_path')
# df['generated_intent_code'] = None
# df['generated_output'] = None
# df['original_index'] = df.index  # Add a column to store the original index


# # Load the Spider-2 Lite dataset
df_spider2 = pd.read_pickle("spider_two_intents_path")
df_spider2['generated_intent_code'] = None
df_spider2['generated_output'] = None
df_spider2['original_index'] = df_spider2.index  # Add a column to store the original index
df_spider2 = df_spider2[df_spider2['execute_error'] == False]


In [None]:
#Function for loading spider2 database
load_csv_database = sp_utils.load_csv_database
CSV_DBS_BASE_PATH =  "/kaggle/input/spider-dbs-csv"
os.environ["DB_CSVS_BASE_PATH"] = CSV_DBS_BASE_PATH #The spider_util needs this

In [None]:
def generate_code_from_llm(prompt):
    """
    Generate Python code using the Google Gemini LLM via the call_llm function.
    
    Parameters:
    - prompt: String containing the task description and context
    
    Returns:
    - String of generated Python code
    """
    generated_code = me.call_llm(
        provider=GLOBAL_LLM_MODEL,
        prompt=prompt,  # Use the evolved_prompt from build_prompt
        temperature=0,
        max_tokens=8192
    )
    return generated_code

# Global context for all prompts
global_context = """
You are a skilled data engineer tasked with completing existing Python code for the next user intent.  
You are provided the previous code, sample rows of the data frames available after executing the previous code, 
and next user intent to implement. Code generated in previous iterations will be provided. Think step by step.
"""

In [None]:
def build_prompt(global_context, old_rounds, current_round_blocks, base_prompt, current_index=None, current_round_num=None):
    """
    Build the evolved prompt with labeled previous code and outputs, trimming history to stay below a character limit.
    Parameters:
    - old_rounds: List of previous rounds' blocks
    - current_round_blocks: List of blocks from the current round so far
    - current_index: Index of the current intent (optional, for labeling)
    - current_round_num: Current round number (optional, for labeling)
    """
    prompt_string = global_context
    prompt_string += "\n"
    # 1) Add the base prompt with existing code
    prompt_string += base_prompt
    
    # 2) Collect all intents into a flat list
    all_intents = []
    for round_idx, round_blocks in enumerate(old_rounds, start=1):
        for block_idx, block in enumerate(round_blocks):
            all_intents.append((round_idx, block_idx, block))
    if current_round_blocks:
        for block_idx, block in enumerate(current_round_blocks):
            all_intents.append((current_round_num, block_idx, block))
    
    # 3) Build history and trim using FIFO strategy based on character limit
    history_string = "# History of generated code and intents from previous iterations \n"
    trimmed_intents = []
    current_char_count = len(history_string)
    
    # Add intents in reverse order (most recent first) to calculate character count
    for intent in reversed(all_intents):
        round_idx, block_idx, block = intent
        intent_block = f"\n\n# Round {round_idx} Intent {block_idx}\n"
        intent_block += f"Intent: {block['intent']}\n"
        if 'iteration_history' in block and block['iteration_history']:
            for iter_idx, iter_block in enumerate(block['iteration_history'], start=1):
                intent_block += f"# Round {round_idx} Iteration {iter_idx}\n"
                intent_block += f"Code: {iter_block['code']}\n"
                intent_block += f"Output: {iter_block.get('output', 'N/A')}\n"
        intent_block += f"Final Code: {block['code']}\n"
        intent_block += f"Output: {block.get('output', 'N/A')}\n"
    
        # Check if adding this intent block exceeds the character limit
        if current_char_count + len(intent_block) <= GLOBAL_MAX_CHAR_LIMIT:
            trimmed_intents.append(intent_block)
            current_char_count += len(intent_block)
        else:
            # Stop adding more intents if the limit is exceeded
            break
    
    # Reverse the trimmed intents to maintain chronological order (oldest to newest)
    history_string += "".join(reversed(trimmed_intents))
    prompt_string += f"\n\n{history_string}"

    prompt_string += "\n\nOutput: \n"
    prompt_string += "Generate only valid Python code for the next intent only not the entire notebook. Don't include any other explanations other than Python comments. Don't convert output to json."

    return prompt_string

In [None]:
def iterative_llm_prompt(prompt, exec_state, n=None):
    """
    Iteratively call the LLM and re-prompt it n times, passing each generated code
    and its execution output back into the prompt for the next iteration.

    Returns:
      final_code   : The last generated code snippet
      final_output : The last execution output
      exec_state   : The updated execution state after the final iteration
    """
    current_prompt = prompt
    current_state = exec_state.copy()
    full_history = []
    final_code = None
    final_output = None
    if n is None:
        n = GLOBAL_ITERATION_COUNT

    import re
    round_match = re.search(r"Round (\d+)", current_prompt)
    round_num = int(round_match.group(1)) if round_match else 1

    for i in range(n):
        print(f"\n--- Iteration {i+1}/{n} ---")
        raw_code = generate_code_from_llm(current_prompt)
        cleaned_code = me.clean_code_markers(raw_code)
        # For the generated code
        iteration_output, _ = me.execute_intent_code(current_state, cleaned_code, GLOBAL_CODE_OUTPUT_LINES)
        print(f"Iteration {i+1} - Generated code:\n{cleaned_code}\n")
        print(f"Iteration {i+1} - Execution output:\n{iteration_output}\n")
        full_history.append({
            "code": cleaned_code,
            "output": iteration_output
        })
        current_prompt += (
            f"\n\n# Round {round_num} Iteration {i+1}\n"
            f"Code: {cleaned_code}\n"
            f"Output: {iteration_output}\n"
        )
        final_code = cleaned_code
        final_output = iteration_output

    return final_code, final_output, full_history, current_state


In [None]:
def process_notebooks(df, max_rounds=None):
    memory = {}
    success_rate = 0
    notebooks = df.groupby('nb_name', sort=False)
    notebook_count = 0
    if max_rounds is None:
        max_rounds = GLOBAL_ROUND_COUNT
    for round_num in range(1, max_rounds + 1):
        print(f"\nStarting round {round_num}\n{'='*20}")
        notebook_count = 0
        for nb_name, nb_group in notebooks:
            if not GLOBAL_RUN_ALL and notebook_count >= GLOBAL_NOTEBOOK_COUNT:
                print(f"Processed {GLOBAL_NOTEBOOK_COUNT} notebooks in round {round_num}. Moving to next round.")
                break
            if nb_name not in memory:
                memory[nb_name] = []
            new_round_memory = []
            round_generated_output = ""

            print(f"\nProcessing notebook: {nb_name}\n{'-'*20}")
            nb_source = nb_group['nb_setup_code'].iloc[0]
            
            input_json = nb_group['inputs'].iloc[0]
            
            exec_state = {"pd": pd , "plt": plt, "np": np, "load_csv_database": load_csv_database}
            outputs = ""   
            try:
                if isinstance(input_json, str):
                    input_dict = ast.literal_eval(input_json)
                elif isinstance(input_json, dict):
                    input_dict = input_json
                else:
                    input_dict = {}
                    
                outputs, exec_state = me.execute_intent_code(exec_state, nb_source)
                print(outputs)
                # If 'first_n_rows' is missing, proceed with minimal exec_state
            except (ValueError, json.JSONDecodeError, SyntaxError):
                pass  # Continue with minimal exec_state if parsing fails
            # Process all rows in this notebook group
            for _, row in nb_group.iterrows():
                index = row['original_index']
                user_intent = row['intent']
                base_prompt = f"Setup Code: {nb_source}\n\n"
                
                evolved_prompt = build_prompt(
                    global_context,
                    memory[nb_name],
                    new_round_memory,
                    base_prompt,
                    nb_name,
                    current_round_num=round_num
                )

                evolved_prompt += f"Next Intent to implement: {user_intent}\n\nSample Input: {outputs}"
                
    
                print(f"\nEvolved Prompt for intent '{user_intent}' (index {index}):\n{'*'*20}\n{evolved_prompt}\n{'*'*20}")
    
                generated_code, generated_output, iteration_history, exec_state = iterative_llm_prompt(evolved_prompt, exec_state)
                print(f"Generated code for index {index}: {generated_code}")
    
                if generated_code is None:
                    print(f"Warning: Generated code is None for intent '{user_intent}' at index {index}")
                                
                print("Executing intent code based on generated code")
                print(f"Generated output for index {index}: {generated_output}")
                
                new_round_memory.append({
                    "intent": user_intent,
                    "code": generated_code,
                    "output": generated_output,
                    "iteration_history": iteration_history
                })
                
                if generated_code is None:
                    print("No generated code available")
                else:
                    df.loc[index, 'generated_intent_code'] = generated_code
                if generated_output is None:
                    print("No generated output available")
                else:
                    df.loc[index, 'generated_output'] = str(generated_output)
                df.loc[index, 'evolved_prompt'] = evolved_prompt
            memory[nb_name].append(new_round_memory)
            print(f"Finished processing all intents for notebook: {nb_name}")
            notebook_count += 1
       
    print("Finished processing all notebooks")
    return df

In [None]:
def divide_and_process_part(
    df: pd.DataFrame,
    part: str = "first",                # "first", "second", or "third"
) -> pd.DataFrame:
    
    part = part.lower()
    if part not in {"first", "second", "third"}:
        raise ValueError('`part` must be "first", "second", or "third".')

    # 1️⃣  Unique task IDs in the order they appear
    unique_ids = df["spider_task_id"].drop_duplicates().tolist()
    total_ids  = len(unique_ids)
    print(f"Size:f{total_ids}")
    if total_ids == 0:
        raise ValueError("DataFrame has no rows or no `spider_task_id` values.")

    # 2️⃣  Determine slice bounds
    chunk = math.ceil(total_ids / 3)
    bounds = {
        "first" : (0, chunk),
        "second": (chunk, 2 * chunk),
        "third" : (2 * chunk, total_ids),
    }
    start, stop = bounds[part]

    # 3️⃣  Select IDs for the chosen slice and subset the DataFrame
    id_slice = unique_ids[start:stop]
    df_slice = df[df["spider_task_id"].isin(id_slice)].copy()
    df_slice = df_slice[df_slice["spider_task_id"] != "local210"]
    # 4️⃣  Run your existing notebook processor on just this slice
    processed_slice = process_notebooks(df_slice)

    # 5️⃣  Save to the required location
    out_path = (
        f"/kaggle/working/"
        f"spider2_intents_transformed_generated_"
        f"{GLOBAL_LLM_MODEL}_{part}_notebooks_m_"
        f"{GLOBAL_ROUND_COUNT}_n_{GLOBAL_ITERATION_COUNT}.csv"
    )
    processed_slice.to_csv(out_path, index=False)
    return processed_slice


In [None]:
# # Process the Arcade notebooks and update the DataFrame
# updated_df = process_notebooks(df)


In [None]:
# # Save the updated DataFrame to a new CSV file
# updated_df.to_csv(f'/kaggle/working/arcade_20_new_transformed_generated_{GLOBAL_LLM_MODEL}_{GLOBAL_NOTEBOOK_COUNT}_notebooks_m_{GLOBAL_ROUND_COUNT}_n_{GLOBAL_ITERATION_COUNT}.csv', index=False)
# print(f"Processing complete. Results saved to 'arcade_20_new_transformed_generated_{GLOBAL_LLM_MODEL}_{GLOBAL_NOTEBOOK_COUNT}_notebooks_m_{GLOBAL_ROUND_COUNT}_n_{GLOBAL_ITERATION_COUNT}.csv'")

In [None]:
# %%capture
# Process the Spider2 notebooks and update the DataFrame and Save to CSV file
# updated_df_spider2 = process_notebooks(df_spider2)
# updated_df_spider2.to_csv(f'/kaggle/working/spider2_transformed_generated_{GLOBAL_LLM_MODEL}_one_notebooks_m_{GLOBAL_ROUND_COUNT}_n_{GLOBAL_ITERATION_COUNT}.csv', index=False)

