In [9]:
!pip install openai-agents



In [23]:
import os
from google.colab import userdata
from google.colab import drive

drive_mount_path = userdata.get('drive_mount_path')
drive.mount(drive_mount_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [13]:
from agents import Agent, Runner, FileSearchTool, WebSearchTool,ModelSettings
import asyncio
import pandas as pd
import numpy as np
from openai import OpenAI
import json
from pydantic import BaseModel

In [15]:
## EDA Agent
## --- Conduct exploratory data analysis based on available data.
## Decide what variable to use and feature engineering.
## --- Available data are in csv format


client = OpenAI()

# class feature_analysis_result(BaseModel):
#   likely_targets:list[str]
#   selected_features:list[str]
#   feature_engineering:list[str]
#   drop_columns:list[str]
#   missing_value_handling:AgentOutputSchema(dict, strict_json_schema=False)

class feature_analysis_Agent:

  '''
  ## Feature Analysis Agent
  ## --- Conduct exploratory data analysis based on available data.
  ## Decide what variable to use and feature engineering.
  ## --- Available data are in csv format

  '''

  def __init__(self, model:str = 'gpt-4o', user_instructions = '', user_defined_target = ''):
    self.model = model
    self.name = 'Feature Analysis Agent'
    self.user_defined_target = user_defined_target
    self.user_instructions = 'You are a data scientist specialized in feature analysis.\
    You analyze data, gives helpful insights on what variable to use and feature engineering.\
    Then you write code to apply your feature engineering suggesions and transform the dataset. '+user_instructions
    if self.user_defined_target:
      self.user_instructions += f'The target variable is {self.user_defined_target}'
    self.agent = Agent(
        name = self.name,
        model = self.model,
        instructions = self.user_instructions,
        model_settings = ModelSettings(temperature = 0),
        # output_type = feature_analysis_result
        )

  async def run(self, csv_path: str, varb_info_path:str=False):
    df = self._load_csv(csv_path)
    profile = self._profile_data(df)
    suggestions = await self._llm_interpretation(profile, varb_info_path)
    return {
        'raw_profile':profile,
        'Feature_analysis_suggestions':suggestions}

  def _load_csv(self, csv_path:str):
    '''load data'''
    df = pd.read_csv(csv_path)
    return df

  def _profile_data(self, df:pd.DataFrame):
    '''Check dataset profile, such as missing %, dtype, unique counts, etc'''
    profile = {}
    profile['n_rows'] = len(df)
    profile['n_cols'] = len(df.columns)

    ## Check dtype, missing %,
    col_info = df.describe().to_dict()
    for i in df.columns:
      col_info[i]['missing_pct'] = 1 - col_info[i]['count']/len(df)
      col_info[i]['dtype'] = str(df[i].dtype)
      col_info[i]['unique_count'] = len(df[i].unique())

    profile['col_info'] = col_info

    return profile

  async def _llm_interpretation(self, profile_dict, varb_info_path = False):
    '''
    Sends data summary and variable information if any to LLM
    '''
    if varb_info_path:
      with open(varb_info_path, 'r') as file:
        varb_info = file.read()
    else:
      varb_info = 'None'

    prompt = f"""
    Given the following dataset profile:
    {json.dumps(profile_dict, indent = 2)}
    and the variable information below:
    {varb_info}

    Please :
    1. Identify likely target variable if no target provided in instructions; otherwise, use the provided target
    2. Identify useful predictor features
    3. Identify columns to drop and reasoning
    4. Suggest feature engineering (e.g. log transform, bucketization). \
    Any columns identified as 'drop columns' should not be selected for feature engineering.
    5. Summarize missing variable issues and solutions

    Return JSON structured as:
    {{
      'likely_targets':[],
      'selected_features':[],
      'drop_columns':[{{'Feature','Reason'}}],
      'feature_engineering':[{{'Feature','Method','Reason'}}],
      'missing_value_handling':[{{'Feature','Method','Reason'}}]

    }}

    """

    result = await Runner.run(self.agent, prompt)
    print(result.final_output)
    return result.final_output

  async def generate_transformation_code(self, df_profile, suggestions):
    """
    Based on suggestions, write code to transform data.
    """


    prompt = f"""
    Based on the following:
    Data Profile:\n{json.dumps(df_profile, indent = 2)},
    Data Transformation Suggestions:\n{json.dumps(suggestions, indent = 2)},

    Write python code that:
    1. The dataset is called 'df'. Do not change the name of the dataset. Do not \
    read additional data.
    2. Applies suggested transformations from Suggestions
    3. return the transformed data in a pandas dataframe format as 'df_transformed'

    Only return python code. No explanation.
    """


    result = await Runner.run(self.agent, prompt)
    # print(result.final_output)
    return result.final_output

  def execute_code(self, csv_path: str,  code:str):
    '''Executes generated code and return df_transformed'''
    df = pd.read_csv(csv_path)
    local_varbs = {'df':df.copy(), 'pd':pd, 'np': np}
    code = code.replace("```", "")
    code = code.removeprefix("python")
    exec(code, {}, local_varbs)
    df_transformed = local_varbs.get('df_transformed',None)

    return df_transformed












In [16]:
# ## No target provided
# csv_path = userdata.get('train_data_path')
# varb_info_path=userdata.get('varb_info_path')
# user_instructions = "build a model that predicts excess returns and includes a betting strategy\
#  designed to outperform the S&P 500 while staying within a 120% volatility constraint. We’ll\
#   provide daily data that combines public market information with our proprietary dataset, giving\
#    you the raw material to uncover patterns most miss."
# fea_agent = feature_analysis_Agent(user_instructions = user_instructions)
# result = await fea_agent.run(csv_path = csv_path, varb_info_path=varb_info_path)


In [24]:
## target provided
csv_path = userdata.get('train_data_path')
varb_info_path=userdata.get('varb_info_path')
user_instructions = "build a model that predicts excess returns and includes a betting strategy\
 designed to outperform the S&P 500 while staying within a 120% volatility constraint. We’ll\
  provide daily data that combines public market information with our proprietary dataset, giving\
   you the raw material to uncover patterns most miss."
fea_agent = feature_analysis_Agent(user_instructions = user_instructions, user_defined_target='market_forward_excess_returns')



In [26]:
fea_eng_result = await fea_agent.run(csv_path = csv_path, varb_info_path=varb_info_path)

```json
{
  "likely_targets": [
    "market_forward_excess_returns"
  ],
  "selected_features": [
    "M*", "E*", "I*", "P*", "V*", "S*"
  ],
  "drop_columns": [
    {
      "Feature": "date_id",
      "Reason": "Identifier, not predictive"
    },
    {
      "Feature": "forward_returns",
      "Reason": "Not available in test set"
    },
    {
      "Feature": "risk_free_rate",
      "Reason": "Not available in test set"
    },
    {
      "Feature": "D*",
      "Reason": "Binary features with low variance"
    }
  ],
  "feature_engineering": [
    {
      "Feature": "E*",
      "Method": "Standardization",
      "Reason": "Different scales and units"
    },
    {
      "Feature": "I*",
      "Method": "Standardization",
      "Reason": "Different scales and units"
    },
    {
      "Feature": "P*",
      "Method": "Log Transform",
      "Reason": "Potential skewness"
    },
    {
      "Feature": "V*",
      "Method": "Bucketization",
      "Reason": "Capture volatility regimes"
   

In [27]:
fea_eng_result['Feature_analysis_suggestions']

'```json\n{\n  "likely_targets": [\n    "market_forward_excess_returns"\n  ],\n  "selected_features": [\n    "M*", "E*", "I*", "P*", "V*", "S*"\n  ],\n  "drop_columns": [\n    {\n      "Feature": "date_id",\n      "Reason": "Identifier, not predictive"\n    },\n    {\n      "Feature": "forward_returns",\n      "Reason": "Not available in test set"\n    },\n    {\n      "Feature": "risk_free_rate",\n      "Reason": "Not available in test set"\n    },\n    {\n      "Feature": "D*",\n      "Reason": "Binary features with low variance"\n    }\n  ],\n  "feature_engineering": [\n    {\n      "Feature": "E*",\n      "Method": "Standardization",\n      "Reason": "Different scales and units"\n    },\n    {\n      "Feature": "I*",\n      "Method": "Standardization",\n      "Reason": "Different scales and units"\n    },\n    {\n      "Feature": "P*",\n      "Method": "Log Transform",\n      "Reason": "Potential skewness"\n    },\n    {\n      "Feature": "V*",\n      "Method": "Bucketization",\n  

In [28]:
code = await fea_agent.generate_transformation_code(
    fea_eng_result['raw_profile'],
    fea_eng_result['Feature_analysis_suggestions']
)

print("Generated code:")
print(code)

Generated code:
```python
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.impute import SimpleImputer

# Drop specified columns
df.drop(columns=['date_id', 'forward_returns', 'risk_free_rate'] + [col for col in df.columns if col.startswith('D')], inplace=True)

# Handle missing values
imputer_mean = SimpleImputer(strategy='mean')
df.loc[:, df.columns.str.startswith('E')] = imputer_mean.fit_transform(df.loc[:, df.columns.str.startswith('E')])
df.loc[:, df.columns.str.startswith('I')] = imputer_mean.fit_transform(df.loc[:, df.columns.str.startswith('I')])
df.loc[:, df.columns.str.startswith('P')] = imputer_mean.fit_transform(df.loc[:, df.columns.str.startswith('P')])
df.loc[:, df.columns.str.startswith('V')] = imputer_mean.fit_transform(df.loc[:, df.columns.str.startswith('V')])
df.loc[:, df.columns.str.startswith('S')] = imputer_mean.fit_transform(df.loc[:, df.columns.str.startswith('S')])

# Forward fill for M* f

In [29]:
# Execute to get transformed df
import numpy as np
csv_path = userdata.get('train_data_path')
df_transformed = fea_agent.execute_code(csv_path = csv_path, code = code)

display(df_transformed.head())

  return func(X, **(kw_args if kw_args else {}))


Unnamed: 0,E1,E10,E11,E12,E13,E14,E15,E16,E17,E18,...,V13,V2,V3,V4,V5,V6,V7,V8,V9,market_forward_excess_returns
0,-3.912782e-16,0.0,1.198695e-16,0.0,9.556425e-17,0.0,0.0,0.0,-7.939110000000001e-18,-1.284109e-17,...,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,-0.003038
1,-3.912782e-16,0.0,1.198695e-16,0.0,9.556425e-17,0.0,0.0,0.0,-7.939110000000001e-18,-1.284109e-17,...,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,-0.009114
2,-3.912782e-16,0.0,1.198695e-16,0.0,9.556425e-17,0.0,0.0,0.0,-7.939110000000001e-18,-1.284109e-17,...,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,-0.010243
3,-3.912782e-16,0.0,1.198695e-16,0.0,9.556425e-17,0.0,0.0,0.0,-7.939110000000001e-18,-1.284109e-17,...,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,0.004046
4,-3.912782e-16,0.0,1.198695e-16,0.0,9.556425e-17,0.0,0.0,0.0,-7.939110000000001e-18,-1.284109e-17,...,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,-0.012301


In [18]:
# ## Build a coding agent to write code according to feature analysis agent's suggestions
# ## to-do: make it more generic and embedded it into other agents
# class CodingAgent:

#   def __init__(self, model:str = 'gpt-4o'):
#     self.model = model
#     self.name = 'Coding Agent'
#     self.user_instructions = 'You are a coding engineer specialized in data science.\
#     You take suggestions as instructions from the feature analysis agent and write \
#     code to transform the given data. In the end, you return the transformed dataset \
#     in a pandas dataframe format. '
#     self.agent = Agent(
#         name = self.name,
#         model = self.model,
#         instructions = self.user_instructions,
#         model_settings = ModelSettings(temperature = 0)
#         )


#   async def generate_transformation_code(self, df_profile, context):
#     """
#     Take contexts from other agents and generate code to complete their missions.
#     Contexts from other agents are in JSON format
#     """


#     prompt = f"""
#     Based on the following:
#     Data Profile:\n{json.dumps(df_profile, indent = 2)},
#     Agent Contexts:\n{json.dumps(context, indent = 2)},

#     Write python code that:
#     1. The dataset is called 'df'. Do not change the name of the dataset. Do not \
#     read additional data.
#     2. Applies suggested transformations from the Feature Analysis Agent Suggestions
#     3. return the transformed data in a pandas dataframe format as 'df_transformed'

#     Only return python code. No explanation.
#     """


#     result = await Runner.run(self.agent, prompt)
#     # print(result.final_output)
#     return result.final_output

#   def execute_code(self, csv_path: str,  code:str):
#     '''Executes generated code and return df_transformed'''
#     df = pd.read_csv(csv_path)
#     local_varbs = {'df':df.copy(), 'pd':pd, 'np': np}
#     code = code.replace("```", "")
#     code = code.removeprefix("python")
#     exec(code, {}, local_varbs)
#     df_transformed = local_varbs.get('df_transformed',None)

#     return df_transformed


# coding_agent = CodingAgent()


# code = await coding_agent.generate_transformation_code(
#     fea_eng_result['raw_profile'],
#     fea_eng_result['Feature_analysis_suggestions']
# )

# print("Generated code:")
# print(code)

# # Execute to get transformed df
# import numpy as np
# csv_path = '/content/drive/MyDrive/Colab Notebooks/MLAgent/train.csv'

# df_transformed = coding_agent.execute_code(csv_path = csv_path, code = code)

# display(df_transformed.head())

In [32]:
df_transformed.loc[:, df_transformed.columns.str.startswith('E')].describe()

Unnamed: 0,E1,E10,E11,E12,E13,E14,E15,E16,E17,E18,E19,E2,E20,E3,E4,E5,E6,E7,E8,E9
count,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0,8990.0
mean,-2.402725e-16,1.770429e-16,1.011674e-16,-2.5291840000000002e-17,4.426073e-17,-6.955257000000001e-17,-3.793777e-17,-2.5291840000000002e-17,1.2645920000000001e-17,1.896888e-17,1.2645920000000001e-17,0.0,-1.896888e-17,4.1099250000000004e-17,-3.003406e-17,1.707199e-16,-1.770429e-16,1.0472400000000001e-17,-1.699296e-17,-8.852145e-17
std,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056,1.000056
min,-2.184065,-1.588325,-0.5407408,-0.4973147,-0.6419216,-0.5564466,-1.473911,-4.741402,-3.931795,-3.913051,-1.654293,-3.855771,-3.654835,-4.02567,-0.4089059,-1.883214,-0.5817415,-21.78338,-1.530875,-0.9327806
25%,-0.5923248,-0.9631261,-0.5207465,-0.4833869,-0.4961834,-0.3823926,-0.9617627,-0.5831173,-0.6400797,-0.6095827,-0.6088367,-0.739723,-0.09491172,-0.6302393,-0.3502366,-0.7873061,-0.510624,7.60232e-18,-0.5841859,-0.8787232
50%,-3.912782e-16,0.0,-0.4093502,-0.4332469,-0.3504452,-0.1793296,0.0,0.0,-7.939110000000001e-18,-1.284109e-17,-0.1554694,0.0,0.1166283,3.909884e-17,-0.2831859,0.0,-0.4078987,7.60232e-18,-0.05177295,-0.1742142
75%,0.330959,0.9280943,1.198695e-16,0.0,0.06855208,0.0,0.9550186,0.6623763,0.6298628,0.6687293,0.2223655,0.772201,0.5900256,0.7352738,-8.793365e-17,1.00836,-6.632328e-17,7.60232e-18,0.1028797,0.5867025
max,5.778718,1.559282,3.775155,3.711657,8.685323,9.770755,1.566779,2.783118,2.742721,2.707673,12.41416,2.162051,2.175447,2.309588,9.288295,1.258943,4.194194,3.885622,25.31221,2.618556


In [33]:
df = pd.read_csv(csv_path)
df.loc[:, df.columns.str.startswith('E')].describe()

Unnamed: 0,E1,E10,E11,E12,E13,E14,E15,E16,E17,E18,E19,E2,E20,E3,E4,E5,E6,E7,E8,E9
count,7206.0,7984.0,7984.0,7984.0,7984.0,7984.0,7984.0,7984.0,7984.0,7984.0,7984.0,7984.0,7374.0,7984.0,7984.0,7984.0,7984.0,2021.0,7984.0,7984.0
mean,1.564574,0.504941,0.125869,0.118739,0.012314,0.007005,0.48507,-0.047127,-0.00733,0.097865,0.118793,0.500651,0.902917,0.340552,0.016795,0.599603,0.122388,-0.036545,-0.272055,0.263144
std,0.633895,0.336922,0.245719,0.251961,0.019263,0.012097,0.348769,1.137741,1.159378,1.146872,1.247622,1.424679,1.272464,1.506653,0.04187,0.337506,0.22205,1.925521,1.514817,0.298619
min,0.325149,0.000661,0.000661,0.000661,0.000661,0.000661,0.000661,-5.130519,-4.302885,-4.131097,-1.826114,-4.675791,-3.308764,-5.374951,0.000661,0.000661,0.000661,-19.918972,-2.457316,0.000661
25%,1.114633,0.165675,0.00496,0.003638,0.002976,0.002315,0.127976,-0.766907,-0.83032,-0.659848,-0.642287,-0.56037,0.440873,-0.661601,0.002646,0.305556,0.013228,-0.124696,-1.185839,0.007606
50%,1.516651,0.505787,0.019511,0.006944,0.005291,0.004299,0.441303,0.001765,0.06271,0.003939,-0.203393,0.693642,1.220291,0.308699,0.00496,0.650132,0.030754,0.016171,-0.51461,0.104167
75%,1.876977,0.822751,0.087384,0.074818,0.015212,0.006283,0.833333,0.763389,0.808831,0.909992,0.507321,1.638542,1.711411,1.482888,0.007275,0.953125,0.098545,0.425972,0.022738,0.471974
max,4.843911,1.0,1.0,1.0,0.169974,0.118386,1.0,2.936733,2.98914,3.024136,14.713768,3.403242,3.409812,3.619623,0.383267,1.0,1.0,3.509994,35.860072,1.0


In [72]:
class ModelingAgent:

  def __init__(self, model = 'gpt-4o',user_instructions = '', user_defined_target = ''):
    self.model = model
    self.name = 'Modeling Agent'
    self.user_defined_target = user_defined_target
    self.user_instructions = 'You are a data scientist specialized in machine learning modeling.\
    You take in problem statement, variable descriptions, transformed data based on the suggestions by the feature analysis agent, \
     and target variable(s). Based on these information, you make suggestions \
     on: \
     1. What kind of model we should build (e.g. Classification or regression)\
     2. What machine learning algorithm to use (e.g. linear regression,time series, XGBoost, or deep learning)\
     3. What hyperparameter we should tune\
      '+user_instructions

    if self.user_defined_target:
      self.user_instructions += f'The target variable is {self.user_defined_target}'

    self.agent = Agent(
        name = self.name,
        model = self.model,
        instructions = self.user_instructions,
        model_settings = ModelSettings(temperature = 0)
        )


  async def proposed_model(self, problem_statement:str, varb_info_path:str,fea_eng_suggestions:str, df_transformed:pd.DataFrame, target_variable:str):

    if varb_info_path:
      with open(varb_info_path, 'r') as file:
        original_varb_info = file.read()
    else:
      original_varb_info = 'None'


    schema = {
        'columns':list(df_transformed.columns),
        'problem_statement':problem_statement,
        'original_varb_info':original_varb_info,
        'fea_eng_suggestions':fea_eng_suggestions,
        'target_variable':target_variable
    }

    prompt = f"""
    You are given:
    * Problem Statement: {schema.get('problem_statement')}
    * Original Variable Descriptions: {schema.get('original_varb_info')}
    * Feature Engineering Suggestions: {schema.get('fea_eng_suggestions')}
    * Transformed Data: {schema.get('columns')}
    * Target Variable: {schema.get('target_variable')}

    Tasks:
    1. Determine whetehr problem is regression or classification
    2. Select a machine learning model type
    3. Recommend hyperparameters to tune on
    4. Recommend evaluation protocol (cross validation or train/test split only)
    5. If cross validation, recommend train/validation/test split ratio as 0.7:0.2:0.1. \
    If train/test split only, recommend train/test split ratio as 0.8:0.2.
    6. Suggest metrics to evaluate model performance
    7. Explain reasoning of the above decisions
    8. Return JSON strictly:

    {{
      'target_variable':'{target_variable}',
      'features':{str([x for x in list(df_transformed.columns) if x != target_variable])}.
      'task_type':'regression'|'classification',
      'model_type':'linear_regression'|'time_series'|'xgboost'|'deep_learning'|...,
      'hyperparameters':'hyperparameter':'value',
      'evaluation_protocol':'cross_validation'|'train_test_split_only',
      'train_test_split_ratio':0.7:0.2:0.1 | 0.8:0.2,
      'metrics':['metric1','metric2','metrics3'...],
      'reasoning':'reasoning'


    }}

    """

    result = await Runner.run(self.agent, prompt)
    print(result.final_output)
    return result.final_output

  async def generate_modeling_code(self, modeling_proposal:str,):
    """
    Based on modeling_proposal, write code to train model and evaluate model performance.
    """


    prompt = f"""
    Based on the following:
    Modeling_proposal:\n{json.dumps(modeling_proposal, indent = 2)},

    Write python code that:
    1. The dataset is called 'df_transformed'. Do not change the name of the dataset. Do not \
    read additional data.
    2. Applies modeling proposal from Modeling_proposal
    3. Write code to train model and evaluate model performance. \
        a. If you use 'early_stopping_rounds' as a parameter, pass it to the constructor of model.
        b. Save training history
    4. return model as 'Model',  evaluation result as 'evaluation_result':{{'metric1':float,'metric2':float...}}\
    training history as 'training_history'

    Only return python code. No explanation.
    """


    result = await Runner.run(self.agent, prompt)
    print(result.final_output)
    return result.final_output

  def execute_code(self, df_transformed: pd.DataFrame,  code:str):
    '''Executes generated code and return model and result'''
    local_varbs = {'df_transformed':df_transformed.copy(), 'pd':pd, 'np': np}
    code = code.replace("```", "")
    code = code.removeprefix("python")
    exec(code, {}, local_varbs)
    model = local_varbs.get('Model',None)
    evaluation_result = local_varbs.get('evaluation_result',None)
    training_history = local_varbs.get('training_history',None)

    return model, evaluation_result,training_history





In [73]:
modeling_agent=ModelingAgent()


In [62]:
problem_statement = "build a model that predicts excess returns and includes a betting strategy\
 designed to outperform the S&P 500 while staying within a 120% volatility constraint. We’ll\
  provide daily data that combines public market information with our proprietary dataset, giving\
   you the raw material to uncover patterns most miss."

varb_info_path=userdata.get('varb_info_path')


fea_eng_suggestions = fea_eng_result['Feature_analysis_suggestions']
df_transformed = df_transformed.copy()
target_variable = 'market_forward_excess_returns'

In [63]:
model_proposal = await modeling_agent.proposed_model(problem_statement, varb_info_path,fea_eng_suggestions, df_transformed, target_variable)

```json
{
  "target_variable": "market_forward_excess_returns",
  "features": [
    "E1", "E10", "E11", "E12", "E13", "E14", "E15", "E16", "E17", "E18", "E19", "E2", "E20", "E3", "E4", "E5", "E6", "E7", "E8", "E9",
    "I1", "I2", "I3", "I4", "I5", "I6", "I7", "I8", "I9",
    "M1", "M10", "M11", "M12", "M13", "M14", "M15", "M16", "M17", "M18", "M2", "M3", "M4", "M5", "M6", "M7", "M8", "M9",
    "P1", "P10", "P11", "P12", "P13", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "P9",
    "S1", "S10", "S11", "S12", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9",
    "V1", "V10", "V11", "V12", "V13", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9"
  ],
  "task_type": "regression",
  "model_type": "xgboost",
  "hyperparameters": {
    "n_estimators": 100,
    "max_depth": 6,
    "learning_rate": 0.1,
    "subsample": 0.8,
    "colsample_bytree": 0.8
  },
  "evaluation_protocol": "cross_validation",
  "train_test_split_ratio": "0.7:0.2:0.1",
  "metrics": ["mean_squared_error", "r2_score"],
  "reason

In [64]:
# model_proposal

In [65]:
modeling_code = await modeling_agent.generate_modeling_code(model_proposal)


```python
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Define features and target
features = [
    "E1", "E10", "E11", "E12", "E13", "E14", "E15", "E16", "E17", "E18", "E19", "E2", "E20", "E3", "E4", "E5", "E6", "E7", "E8", "E9",
    "I1", "I2", "I3", "I4", "I5", "I6", "I7", "I8", "I9",
    "M1", "M10", "M11", "M12", "M13", "M14", "M15", "M16", "M17", "M18", "M2", "M3", "M4", "M5", "M6", "M7", "M8", "M9",
    "P1", "P10", "P11", "P12", "P13", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "P9",
    "S1", "S10", "S11", "S12", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9",
    "V1", "V10", "V11", "V12", "V13", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9"
]
target = "market_forward_excess_returns"

# Split the data
train_data, temp_data = train_test_split(df_transformed, test_size=0.3, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=1/3, 

In [66]:
# modeling_code

In [74]:
Model, evaluation_result,training_history = modeling_agent.execute_code(df_transformed,  modeling_code)

Parameters: { "n_estimators" } are not used.

  self.starting_round = model.num_boosted_rounds()


In [75]:
evaluation_result

{'mean_squared_error': 0.00012616092519736596,
 'r2_score': -0.022558179162524672}

In [76]:
training_history

{'train': OrderedDict([('rmse',
               [0.01048210790108843,
                0.01041397585188847,
                0.0103250956847731,
                0.01024958200210367,
                0.01017249797868847,
                0.01009467933604893,
                0.01002933622804429,
                0.00998057441192418,
                0.0099224304977216,
                0.00987254126910561,
                0.00979311562672036])]),
 'eval': OrderedDict([('rmse',
               [0.01029564606825463,
                0.0103006373860316,
                0.01031320860965398,
                0.01031873862249828,
                0.01033437780490372,
                0.01033729360242271,
                0.0103278794016274,
                0.01032343791700244,
                0.01033179782979834,
                0.01034031621837822,
                0.01033699198055439])])}

In [None]:
# ## Build an evaluation agent

## to-do


# class EvaluationAgent:

#   def __init__(self, model = 'gpt-4o',user_instructions = '', user_defined_target = ''):
#     self.model = model
#     self.name = 'Evaluation Agent'
#     self.user_defined_target = user_defined_target
#     self.user_instructions = 'You are a data scientist specialized in evaluating machine learning model.\
#     You take in modeling proposal by the modeling agent and the test data.\
#     Based on these information, you make suggestions on: \
#      1.\
#       '+user_instructions

#     if self.user_defined_target:
#       self.user_instructions += f'The target variable is {self.user_defined_target}'

#     self.agent = Agent(
#         name = self.name,
#         model = self.model,
#         instructions = self.user_instructions,
#         model_settings = ModelSettings(temperature = 0)
#         )


#   async def proposed_model(self, problem_statement:str, varb_info_path:str,fea_eng_suggestions:str, df_transformed:pd.DataFrame, target_variable:str):

#     if varb_info_path:
#       with open(varb_info_path, 'r') as file:
#         original_varb_info = file.read()
#     else:
#       original_varb_info = 'None'


#     schema = {
#         'columns':list(df_transformed.columns),
#         'problem_statement':problem_statement,
#         'original_varb_info':original_varb_info,
#         'fea_eng_suggestions':fea_eng_suggestions,
#         'target_variable':target_variable
#     }

#     prompt = f"""
#     You are given:
#     * Problem Statement: {schema.get('problem_statement')}
#     * Original Variable Descriptions: {schema.get('original_varb_info')}
#     * Feature Engineering Suggestions: {schema.get('fea_eng_suggestions')}
#     * Transformed Data: {schema.get('columns')}
#     * Target Variable: {schema.get('target_variable')}

#     Tasks:
#     1. Determine whetehr problem is regression or classification
#     2. Select a machine learning model type
#     3. Recommend hyperparameters to tune on
#     4. Recommend evaluation protocol (cross validation or train/test split only)
#     5. If cross validation, recommend train/validation/test split ratio as 0.7:0.2:0.1. \
#     If train/test split only, recommend train/test split ratio as 0.8:0.2.
#     6. Suggest metrics to evaluate model performance
#     7. Explain reasoning of the above decisions
#     8. Return JSON strictly:

#     {{
#       'target_variable':'{target_variable}',
#       'features':{str([x for x in list(df_transformed.columns) if x != target_variable])}.
#       'task_type':'regression'|'classification',
#       'model_type':'linear_regression'|'time_series'|'xgboost'|'deep_learning'|...,
#       'hyperparameters':'hyperparameter':'value',
#       'evaluation_protocol':'cross_validation'|'train_test_split_only',
#       'train_test_split_ratio':0.7:0.2:0.1 | 0.8:0.2,
#       'metrics':['metric1','metric2','metrics3'...],
#       'reasoning':'reasoning'


#     }}

#     """

#     result = await Runner.run(self.agent, prompt)
#     print(result.final_output)
#     return result.final_output

#   async def generate_modeling_code(self, modeling_proposal:str,):
#     """
#     Based on modeling_proposal, write code to train model and evaluate model performance.
#     """


#     prompt = f"""
#     Based on the following:
#     Modeling_proposal:\n{json.dumps(modeling_proposal, indent = 2)},

#     Write python code that:
#     1. The dataset is called 'df_transformed'. Do not change the name of the dataset. Do not \
#     read additional data.
#     2. Applies modeling proposal from Modeling_proposal
#     3. Write code to train model and evaluate model performance. \
#     If you use 'early_stopping_rounds' as a parameter, pass it to the constructor of model.
#     4. return model as 'Model' and evaluation result as 'evaluation_result':{{'metric1':float,'metric2':float...}}

#     Only return python code. No explanation.
#     """


#     result = await Runner.run(self.agent, prompt)
#     print(result.final_output)
#     return result.final_output

#   def execute_code(self, df_transformed: pd.DataFrame,  code:str):
#     '''Executes generated code and return model and result'''
#     local_varbs = {'df_transformed':df_transformed.copy(), 'pd':pd, 'np': np}
#     code = code.replace("```", "")
#     code = code.removeprefix("python")
#     exec(code, {}, local_varbs)
#     model = local_varbs.get('Model',None)
#     evaluation_result = local_varbs.get('evaluation_result',None)

#     return model, evaluation_result



