In [1]:
!pip install openai-agents

Collecting openai-agents
  Downloading openai_agents-0.6.1-py3-none-any.whl.metadata (13 kB)
Collecting griffe<2,>=1.5.6 (from openai-agents)
  Downloading griffe-1.15.0-py3-none-any.whl.metadata (5.2 kB)
Collecting types-requests<3,>=2.0 (from openai-agents)
  Downloading types_requests-2.32.4.20250913-py3-none-any.whl.metadata (2.0 kB)
Collecting colorama>=0.4 (from griffe<2,>=1.5.6->openai-agents)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading openai_agents-0.6.1-py3-none-any.whl (237 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m237.6/237.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading griffe-1.15.0-py3-none-any.whl (150 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.7/150.7 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading types_requests-2.32.4.20250913-py3-none-any.whl (20 kB)
Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: types-reques

In [2]:
import os
from google.colab import userdata
from google.colab import drive

drive_mount_path = userdata.get('drive_mount_path')
drive.mount(drive_mount_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [4]:
from agents import Agent, Runner, FileSearchTool, WebSearchTool,ModelSettings
import asyncio
import pandas as pd
import numpy as np
from openai import OpenAI
import json
from pydantic import BaseModel

In [5]:
## EDA Agent
## --- Conduct exploratory data analysis based on available data.
## Decide what variable to use and feature engineering.
## --- Available data are in csv format


client = OpenAI()

# class feature_analysis_result(BaseModel):
#   likely_targets:list[str]
#   selected_features:list[str]
#   feature_engineering:list[str]
#   drop_columns:list[str]
#   missing_value_handling:AgentOutputSchema(dict, strict_json_schema=False)

class feature_analysis_Agent:

  '''
  ## Feature Analysis Agent
  ## --- Conduct exploratory data analysis based on available data.
  ## Decide what variable to use and feature engineering.
  ## --- Available data are in csv format

  '''

  def __init__(self, model:str = 'gpt-4o', user_instructions = '', user_defined_target = ''):
    self.model = model
    self.name = 'Feature Analysis Agent'
    self.user_defined_target = user_defined_target
    self.user_instructions = 'You are a data scientist specialized in feature analysis.\
    You analyze data, gives helpful insights on what variable to use and feature engineering.\
    Then you write code to apply your feature engineering suggesions and transform the dataset. '+user_instructions
    if self.user_defined_target:
      self.user_instructions += f'The target variable is {self.user_defined_target}'
    self.agent = Agent(
        name = self.name,
        model = self.model,
        instructions = self.user_instructions,
        model_settings = ModelSettings(temperature = 0),
        # output_type = feature_analysis_result
        )

  async def run(self, csv_path: str, varb_info_path:str=False):
    df = self._load_csv(csv_path)
    profile = self._profile_data(df)
    suggestions = await self._llm_interpretation(profile, varb_info_path)
    return {
        'raw_profile':profile,
        'Feature_analysis_suggestions':suggestions}

  def _load_csv(self, csv_path:str):
    '''load data'''
    df = pd.read_csv(csv_path)
    return df

  def _profile_data(self, df:pd.DataFrame):
    '''Check dataset profile, such as missing %, dtype, unique counts, etc'''
    profile = {}
    profile['n_rows'] = len(df)
    profile['n_cols'] = len(df.columns)

    ## Check dtype, missing %,
    col_info = df.describe().to_dict()
    for i in df.columns:
      col_info[i]['missing_pct'] = 1 - col_info[i]['count']/len(df)
      col_info[i]['dtype'] = str(df[i].dtype)
      col_info[i]['unique_count'] = len(df[i].unique())

    profile['col_info'] = col_info

    return profile

  async def _llm_interpretation(self, profile_dict, varb_info_path = False):
    '''
    Sends data summary and variable information if any to LLM
    '''
    if varb_info_path:
      with open(varb_info_path, 'r') as file:
        varb_info = file.read()
    else:
      varb_info = 'None'

    prompt = f"""
    Given the following dataset profile:
    {json.dumps(profile_dict, indent = 2)}
    and the variable information below:
    {varb_info}

    Please :
    1. Identify likely target variable if no target provided in instructions; otherwise, use the provided target
    2. Identify useful predictor features
    3. Identify columns to drop and reasoning
    4. Suggest feature engineering (e.g. log transform, bucketization). \
    Any columns identified as 'drop columns' should not be selected for feature engineering.
    5. Summarize missing variable issues and solutions

    Return JSON structured as:
    {{
      'likely_targets':[],
      'selected_features':[],
      'drop_columns':[{{'Feature','Reason'}}],
      'feature_engineering':[{{'Feature','Method','Reason'}}],
      'missing_value_handling':[{{'Feature','Method','Reason'}}]

    }}

    """

    result = await Runner.run(self.agent, prompt)
    print(result.final_output)
    return result.final_output

  async def generate_transformation_code(self, df_profile, suggestions):
    """
    Based on suggestions, write code to transform data.
    """


    prompt = f"""
    Based on the following:
    Data Profile:\n{json.dumps(df_profile, indent = 2)},
    Data Transformation Suggestions:\n{json.dumps(suggestions, indent = 2)},

    Write python code that:
    1. The dataset is called 'df'. Do not change the name of the dataset. Do not \
    read additional data.
    2. Applies suggested transformations from Suggestions
    3. return the transformed data in a pandas dataframe format as 'df_transformed'

    Only return python code. No explanation.
    """


    result = await Runner.run(self.agent, prompt)
    # print(result.final_output)
    return result.final_output

  def execute_code(self, csv_path: str,  code:str):
    '''Executes generated code and return df_transformed'''
    df = pd.read_csv(csv_path)
    local_varbs = {'df':df.copy(), 'pd':pd, 'np': np}
    code = code.replace("```", "")
    code = code.removeprefix("python")
    exec(code, {}, local_varbs)
    df_transformed = local_varbs.get('df_transformed',None)

    return df_transformed












In [6]:
# ## No target provided
# csv_path = userdata.get('train_data_path')
# varb_info_path=userdata.get('varb_info_path')
# user_instructions = "build a model that predicts excess returns and includes a betting strategy\
#  designed to outperform the S&P 500 while staying within a 120% volatility constraint. We’ll\
#   provide daily data that combines public market information with our proprietary dataset, giving\
#    you the raw material to uncover patterns most miss."
# fea_agent = feature_analysis_Agent(user_instructions = user_instructions)
# result = await fea_agent.run(csv_path = csv_path, varb_info_path=varb_info_path)


In [7]:
## target provided
csv_path = userdata.get('train_data_path')
varb_info_path=userdata.get('varb_info_path')
user_instructions = "build a model that predicts excess returns and includes a betting strategy\
 designed to outperform the S&P 500 while staying within a 120% volatility constraint. We’ll\
  provide daily data that combines public market information with our proprietary dataset, giving\
   you the raw material to uncover patterns most miss."
fea_agent = feature_analysis_Agent(user_instructions = user_instructions, user_defined_target='market_forward_excess_returns')



In [8]:
fea_eng_result = await fea_agent.run(csv_path = csv_path, varb_info_path=varb_info_path)

```json
{
  "likely_targets": [
    "market_forward_excess_returns"
  ],
  "selected_features": [
    "M*", "E*", "I*", "P*", "V*", "S*", "D*"
  ],
  "drop_columns": [
    {
      "Feature": "date_id",
      "Reason": "Identifier with no predictive value"
    },
    {
      "Feature": "forward_returns",
      "Reason": "Not available in test set"
    },
    {
      "Feature": "risk_free_rate",
      "Reason": "Not available in test set"
    }
  ],
  "feature_engineering": [
    {
      "Feature": "E*",
      "Method": "Standardization",
      "Reason": "Macro Economic features have different scales"
    },
    {
      "Feature": "I*",
      "Method": "Standardization",
      "Reason": "Interest Rate features have different scales"
    },
    {
      "Feature": "P*",
      "Method": "Log Transform",
      "Reason": "Price/Valuation features may have skewed distributions"
    },
    {
      "Feature": "V*",
      "Method": "Standardization",
      "Reason": "Volatility features have diff

In [9]:
fea_eng_result['Feature_analysis_suggestions']

'```json\n{\n  "likely_targets": [\n    "market_forward_excess_returns"\n  ],\n  "selected_features": [\n    "M*", "E*", "I*", "P*", "V*", "S*", "D*"\n  ],\n  "drop_columns": [\n    {\n      "Feature": "date_id",\n      "Reason": "Identifier with no predictive value"\n    },\n    {\n      "Feature": "forward_returns",\n      "Reason": "Not available in test set"\n    },\n    {\n      "Feature": "risk_free_rate",\n      "Reason": "Not available in test set"\n    }\n  ],\n  "feature_engineering": [\n    {\n      "Feature": "E*",\n      "Method": "Standardization",\n      "Reason": "Macro Economic features have different scales"\n    },\n    {\n      "Feature": "I*",\n      "Method": "Standardization",\n      "Reason": "Interest Rate features have different scales"\n    },\n    {\n      "Feature": "P*",\n      "Method": "Log Transform",\n      "Reason": "Price/Valuation features may have skewed distributions"\n    },\n    {\n      "Feature": "V*",\n      "Method": "Standardization",\n    

In [10]:
code = await fea_agent.generate_transformation_code(
    fea_eng_result['raw_profile'],
    fea_eng_result['Feature_analysis_suggestions']
)

print("Generated code:")
print(code)

Generated code:
```python
import pandas as pd
from sklearn.preprocessing import StandardScaler, FunctionTransformer
import numpy as np

# Drop columns
df = df.drop(columns=['date_id', 'forward_returns', 'risk_free_rate'])

# Impute missing values with mean
for col in df.columns:
    if df[col].isnull().any():
        df[col].fillna(df[col].mean(), inplace=True)

# Standardization
scaler = StandardScaler()
for prefix in ['E', 'I', 'V', 'S']:
    cols = [col for col in df.columns if col.startswith(prefix)]
    df[cols] = scaler.fit_transform(df[cols])

# Log Transform for P* features
log_transformer = FunctionTransformer(np.log1p, validate=True)
p_cols = [col for col in df.columns if col.startswith('P')]
df[p_cols] = log_transformer.fit_transform(df[p_cols])

# Return the transformed dataframe
df_transformed = df
```


In [11]:
# Execute to get transformed df
import numpy as np
csv_path = userdata.get('train_data_path')
df_transformed = fea_agent.execute_code(csv_path = csv_path, code = code)

display(df_transformed.head())

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  return func(X, **(kw_args if kw_args else {}))


Unnamed: 0,D1,D2,D3,D4,D5,D6,D7,D8,D9,E1,...,V13,V2,V3,V4,V5,V6,V7,V8,V9,market_forward_excess_returns
0,0,0,0,1,1,0,0,0,1,-3.912782e-16,...,1.143207e-17,0.0,1.923667e-16,-3.84326e-16,-5.2779070000000005e-17,0.0,0.0,0.0,0.0,-0.003038
1,0,0,0,1,1,0,0,0,1,-3.912782e-16,...,1.143207e-17,0.0,1.923667e-16,-3.84326e-16,-5.2779070000000005e-17,0.0,0.0,0.0,0.0,-0.009114
2,0,0,0,1,0,0,0,0,1,-3.912782e-16,...,1.143207e-17,0.0,1.923667e-16,-3.84326e-16,-5.2779070000000005e-17,0.0,0.0,0.0,0.0,-0.010243
3,0,0,0,1,0,0,0,0,0,-3.912782e-16,...,1.143207e-17,0.0,1.923667e-16,-3.84326e-16,-5.2779070000000005e-17,0.0,0.0,0.0,0.0,0.004046
4,0,0,0,1,0,0,0,0,0,-3.912782e-16,...,1.143207e-17,0.0,1.923667e-16,-3.84326e-16,-5.2779070000000005e-17,0.0,0.0,0.0,0.0,-0.012301


In [12]:
class ModelingAgent:

  def __init__(self, model = 'gpt-4o',user_instructions = '', user_defined_target = ''):
    self.model = model
    self.name = 'Modeling Agent'
    self.user_defined_target = user_defined_target
    self.user_instructions = 'You are a data scientist specialized in machine learning modeling.\
    You take in problem statement, variable descriptions, transformed data based on the suggestions by the feature analysis agent, \
     and target variable(s). Based on these information, you make suggestions \
     on: \
     1. What kind of model we should build (e.g. Classification or regression)\
     2. What machine learning algorithm to use (e.g. linear regression,time series, XGBoost, or deep learning)\
     3. What hyperparameter we should tune\
      '+user_instructions

    if self.user_defined_target:
      self.user_instructions += f'The target variable is {self.user_defined_target}'

    self.agent = Agent(
        name = self.name,
        model = self.model,
        instructions = self.user_instructions,
        model_settings = ModelSettings(temperature = 0)
        )


  async def proposed_model(self, problem_statement:str, varb_info_path:str,fea_eng_suggestions:str, df_transformed:pd.DataFrame, target_variable:str):

    if varb_info_path:
      with open(varb_info_path, 'r') as file:
        original_varb_info = file.read()
    else:
      original_varb_info = 'None'


    schema = {
        'columns':list(df_transformed.columns),
        'problem_statement':problem_statement,
        'original_varb_info':original_varb_info,
        'fea_eng_suggestions':fea_eng_suggestions,
        'target_variable':target_variable
    }

    prompt = f"""
    You are given:
    * Problem Statement: {schema.get('problem_statement')}
    * Original Variable Descriptions: {schema.get('original_varb_info')}
    * Feature Engineering Suggestions: {schema.get('fea_eng_suggestions')}
    * Transformed Data: {schema.get('columns')}
    * Target Variable: {schema.get('target_variable')}

    Tasks:
    1. Determine whetehr problem is regression or classification
    2. Select a machine learning model type
    3. Recommend hyperparameters to tune on
    4. Recommend evaluation protocol (cross validation or train/test split only)
    5. If cross validation, recommend train/validation/test split ratio as 0.7:0.2:0.1. \
    If train/test split only, recommend train/test split ratio as 0.8:0.2.
    6. Suggest metrics to evaluate model performance
    7. Explain reasoning of the above decisions
    8. Return JSON strictly:

    {{
      'target_variable':'{target_variable}',
      'features':{str([x for x in list(df_transformed.columns) if x != target_variable])}.
      'task_type':'regression'|'classification',
      'model_type':'linear_regression'|'time_series'|'xgboost'|'deep_learning'|...,
      'hyperparameters':'hyperparameter':'value',
      'evaluation_protocol':'cross_validation'|'train_test_split_only',
      'train_test_split_ratio':0.7:0.2:0.1 | 0.8:0.2,
      'metrics':['metric1','metric2','metrics3'...],
      'reasoning':'reasoning'


    }}

    """

    result = await Runner.run(self.agent, prompt)
    print(result.final_output)
    return result.final_output

  async def generate_modeling_code(self, modeling_proposal:str,):
    """
    Based on modeling_proposal, write code to train model and evaluate model performance.
    """


    prompt = f"""
    Based on the following:
    Modeling_proposal:\n{json.dumps(modeling_proposal, indent = 2)},

    Write python code that:
    1. The dataset is called 'df_transformed'. Do not change the name of the dataset. Do not \
    read additional data.
    2. Applies modeling proposal from Modeling_proposal
    3. Write code to train model and evaluate model performance. \
        a. If you use 'early_stopping_rounds' as a parameter, pass it to the constructor of model.
        b. Save training history
    4. return model as 'Model',  evaluation result as 'evaluation_result':{{'metric1':float,'metric2':float...}}\
    training history as 'training_history'

    Only return python code. No explanation.
    """


    result = await Runner.run(self.agent, prompt)
    print(result.final_output)
    return result.final_output

  async def execute_code(self, df_transformed: pd.DataFrame,  code:str):
    '''Executes generated code and return model and result'''
    local_varbs = {'df_transformed':df_transformed.copy(), 'pd':pd, 'np': np}
    code = code.replace("```", "")
    code = code.removeprefix("python")
    try:
      exec(code, {}, local_varbs)
    except Exception as e:
      prompt = f'''Receive this error: {e}. Fix the error in the original code.Original code: {code}. Only return python code. No explanation.'''
      result = await Runner.run(self.agent, prompt)
      print(f"{prompt} \n Error: {e} \n Updated code: {result.final_output}")
      code = result.final_output
      code = code.replace("```", "")
      code = code.removeprefix("python")
      exec(code, {}, local_varbs)

    model = local_varbs.get('Model',None)
    evaluation_result = local_varbs.get('evaluation_result',None)
    training_history = local_varbs.get('training_history',None)

    return model, evaluation_result,training_history



In [None]:
modeling_agent=ModelingAgent()

In [14]:
problem_statement = "build a model that predicts excess returns and includes a betting strategy\
 designed to outperform the S&P 500 while staying within a 120% volatility constraint. We’ll\
  provide daily data that combines public market information with our proprietary dataset, giving\
   you the raw material to uncover patterns most miss."

varb_info_path=userdata.get('varb_info_path')


fea_eng_suggestions = fea_eng_result['Feature_analysis_suggestions']
df_transformed = df_transformed.copy()
target_variable = 'market_forward_excess_returns'

In [15]:
model_proposal = await modeling_agent.proposed_model(problem_statement, varb_info_path,fea_eng_suggestions, df_transformed, target_variable)

```json
{
  "target_variable": "market_forward_excess_returns",
  "features": [
    "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "E1", "E10", "E11", "E12", "E13", "E14", "E15", "E16", "E17", "E18", "E19", "E2", "E20", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "I1", "I2", "I3", "I4", "I5", "I6", "I7", "I8", "I9", "M1", "M10", "M11", "M12", "M13", "M14", "M15", "M16", "M17", "M18", "M2", "M3", "M4", "M5", "M6", "M7", "M8", "M9", "P1", "P10", "P11", "P12", "P13", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "P9", "S1", "S10", "S11", "S12", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "V1", "V10", "V11", "V12", "V13", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9"
  ],
  "task_type": "regression",
  "model_type": "xgboost",
  "hyperparameters": {
    "n_estimators": 100,
    "max_depth": 6,
    "learning_rate": 0.1,
    "subsample": 0.8,
    "colsample_bytree": 0.8
  },
  "evaluation_protocol": "cross_validation",
  "train_test_split_ratio": "0.7:0.2:0.1",
  "metrics": ["mean_squa

In [16]:
# model_proposal

In [17]:
# df_transformed

In [18]:
modeling_code = await modeling_agent.generate_modeling_code(model_proposal)


```python
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Define features and target
X = df_transformed[[
    "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "E1", "E10", "E11", "E12", "E13", "E14", "E15", "E16", "E17", "E18", "E19", "E2", "E20", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "I1", "I2", "I3", "I4", "I5", "I6", "I7", "I8", "I9", "M1", "M10", "M11", "M12", "M13", "M14", "M15", "M16", "M17", "M18", "M2", "M3", "M4", "M5", "M6", "M7", "M8", "M9", "P1", "P10", "P11", "P12", "P13", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "P9", "S1", "S10", "S11", "S12", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "V1", "V10", "V11", "V12", "V13", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9"
]]
y = df_transformed["market_forward_excess_returns"]

# Split the data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_t

In [19]:
Model, evaluation_result,training_history = await modeling_agent.execute_code(df_transformed,  modeling_code)

In [20]:
evaluation_result

{'mean_squared_error': 0.00012322073669942247,
 'r2_score': 0.0012726051484519552}

In [21]:
training_history

{'validation_0': OrderedDict([('rmse',
               [0.01048529397134944,
                0.0103840285095624,
                0.01028961823970748,
                0.01022318456637951,
                0.01014926914552499,
                0.01008554739715958,
                0.01000718742916796,
                0.00995404975029369,
                0.00988796954583577,
                0.00983949159071531,
                0.00973796863132236])]),
 'validation_1': OrderedDict([('rmse',
               [0.01028114538201944,
                0.01028993741031988,
                0.01030893996021545,
                0.01031392154295,
                0.01030915006405261,
                0.0103250057622733,
                0.0103157590215059,
                0.01031127311783293,
                0.01031158683060553,
                0.01030652924031971,
                0.01029361956119773])])}

In [22]:
## Build an evaluation agent


class EvaluationAgent:

  def __init__(self, model = 'gpt-4o',user_instructions = '', user_defined_target = ''):
    self.model = model
    self.name = 'Evaluation Agent'
    self.user_defined_target = user_defined_target
    self.user_instructions = 'You are a data scientist specialized in evaluating machine learning model.\
    You take in modeling proposal, training result, and training history.\
    Based on these information, you make suggestions on how to improve the model\
      '+user_instructions

    if self.user_defined_target:
      self.user_instructions += f'The target variable is {self.user_defined_target}'

    self.agent = Agent(
        name = self.name,
        model = self.model,
        instructions = self.user_instructions,
        model_settings = ModelSettings(temperature = 0)
        )


  async def analyze_model(self, modeling_proposal:str, training_result:str,training_history:str):


    schema = {
        'modeling_proposal':modeling_proposal,
        'training_result':training_result,
        'training_history':training_history,
        'target_variable':self.user_defined_target
    }

    prompt = f"""
    You are given:
    * Modeling Proposal: {schema.get('modeling_proposal')}
    * Training Result: {schema.get('training_result')}
    * Training History: {schema.get('training_history')}
    * Target Variable: {schema.get('target_variable')}

    Tasks:
    1. Read Modeling Proposal
    2. Examinate the training history and training result
    3. Suggest method to improve the model
    4. Explain reasoning of the above decisions
    5. Return JSON strictly:

    {{
      'target_variable':'{self.user_defined_target }',
      'modeling_proposal':{modeling_proposal}.
      'training_result':{training_result},
      'reasoning':'reasoning'


    }}

    """

    result = await Runner.run(self.agent, prompt)
    print(result.final_output)
    return result.final_output

  # async def generate_modeling_code(self, modeling_proposal:str,):
  #   """
  #   Based on modeling_proposal, write code to train model and evaluate model performance.
  #   """


  #   prompt = f"""
  #   Based on the following:
  #   Modeling_proposal:\n{json.dumps(modeling_proposal, indent = 2)},

  #   Write python code that:
  #   1. The dataset is called 'df_transformed'. Do not change the name of the dataset. Do not \
  #   read additional data.
  #   2. Applies modeling proposal from Modeling_proposal
  #   3. Write code to train model and evaluate model performance. \
  #   If you use 'early_stopping_rounds' as a parameter, pass it to the constructor of model.
  #   4. return model as 'Model' and evaluation result as 'evaluation_result':{{'metric1':float,'metric2':float...}}

  #   Only return python code. No explanation.
  #   """


  #   result = await Runner.run(self.agent, prompt)
  #   print(result.final_output)
  #   return result.final_output

  # def execute_code(self, df_transformed: pd.DataFrame,  code:str):
  #   '''Executes generated code and return model and result'''
  #   local_varbs = {'df_transformed':df_transformed.copy(), 'pd':pd, 'np': np}
  #   code = code.replace("```", "")
  #   code = code.removeprefix("python")
  #   exec(code, {}, local_varbs)
  #   model = local_varbs.get('Model',None)
  #   evaluation_result = local_varbs.get('evaluation_result',None)

  #   return model, evaluation_result





In [23]:
evaluation_agent = EvaluationAgent(user_instructions = problem_statement,user_defined_target =target_variable)

In [24]:
evaluator_suggestion = await evaluation_agent.analyze_model(model_proposal, evaluation_result,training_history)

```json
{
  "target_variable": "market_forward_excess_returns",
  "modeling_proposal": {
    "target_variable": "market_forward_excess_returns",
    "features": [
      "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "E1", "E10", "E11", "E12", "E13", "E14", "E15", "E16", "E17", "E18", "E19", "E2", "E20", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "I1", "I2", "I3", "I4", "I5", "I6", "I7", "I8", "I9", "M1", "M10", "M11", "M12", "M13", "M14", "M15", "M16", "M17", "M18", "M2", "M3", "M4", "M5", "M6", "M7", "M8", "M9", "P1", "P10", "P11", "P12", "P13", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "P9", "S1", "S10", "S11", "S12", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "V1", "V10", "V11", "V12", "V13", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9"
    ],
    "task_type": "regression",
    "model_type": "xgboost",
    "hyperparameters": {
      "n_estimators": 100,
      "max_depth": 6,
      "learning_rate": 0.1,
      "subsample": 0.8,
      "colsample_bytree": 0.8
    },
    "ev

In [37]:
eval(evaluator_suggestion.replace("```", "").removeprefix("json"))['reasoning']

"The training results indicate a very low r2_score, suggesting that the model is not capturing the variance in the target variable effectively. The training history shows that the validation RMSE does not improve significantly, indicating potential overfitting or that the model is not learning effectively. To improve the model, consider the following: 1) Feature engineering to create more informative features. 2) Hyperparameter tuning, especially increasing 'n_estimators' and adjusting 'max_depth' and 'learning_rate'. 3) Try different model types like Random Forest or Neural Networks. 4) Use more advanced techniques like feature selection or dimensionality reduction to improve model performance."

In [39]:
## Report agent

class ReportAgent:

  def __init__(self, model = 'gpt-4o',user_instructions = '', user_defined_target = ''):
    self.model = model
    self.name = 'Report Agent'
    self.user_defined_target = user_defined_target
    self.user_instructions = 'You are a data scientist specialized in summarizing the given information and generate a report.\
    You take in promblem statement, modeling proposal, training result, and model optimization suggestions.\
    Based on these information, you summarize the information and generate a report\
      '+user_instructions

    if self.user_defined_target:
      self.user_instructions += f'The target variable is {self.user_defined_target}'

    self.agent = Agent(
        name = self.name,
        model = self.model,
        instructions = self.user_instructions,
        model_settings = ModelSettings(temperature = 0)
        )

  async def generate_report(
        self,
        problem_statement: str,
        fea_eng_result: str,
        model_proposal: str,
        training_result: str,
        optimization_suggestion: str = ""
    ) -> str:
        """
        Generate a final full analysis report.
        """
        prompt = f"""
        Generate a detailed professional analytical report based on the following inputs.

        ### **Problem Statement**
        {problem_statement}

        ### **Variable Selection & Feature Engineering and rationale**
        {fea_eng_result}

        ### **Modeling Summary**
        {model_proposal}

        ### **Model Training Result**
        {training_result}

        ### **Model Optimization Suggestions**
        {optimization_suggestion}

        Format the report in structured markdown with:
        - Executive summary
        - Problem statement
        - Variable selection and feature engineering and rationale
        - Model selection and training methodology
        - Model training and evaluation results
        - Model Optimization Sggestions
        """

        result = await Runner.run(self.agent, prompt)
        print(result.final_output)
        return result.final_output



In [40]:
report_agent = ReportAgent(user_instructions = problem_statement,user_defined_target =target_variable)

In [41]:
optimization_suggestion = eval(evaluator_suggestion.replace("```", "").removeprefix("json"))['reasoning']

In [42]:
report = await report_agent.generate_report(problem_statement, fea_eng_result, model_proposal, evaluation_result, optimization_suggestion)

# Analytical Report

## Executive Summary

This report outlines the development of a predictive model aimed at forecasting excess returns and implementing a betting strategy to outperform the S&P 500, while maintaining a volatility constraint of 120%. The model leverages a combination of public market data and proprietary datasets to identify patterns often overlooked. Despite the initial model's low performance, several optimization strategies are proposed to enhance its predictive capabilities.

## Problem Statement

The objective is to build a model that predicts excess returns and includes a betting strategy designed to outperform the S&P 500, while adhering to a 120% volatility constraint. The model utilizes daily data that combines public market information with proprietary datasets to uncover hidden patterns.

## Variable Selection & Feature Engineering and Rationale

### Data Overview
- **Dataset Size**: 8990 rows, 98 columns
- **Target Variable**: `market_forward_excess_return