In [1]:
from classes.openai_wrapper import OpenAIWrapper
from classes.process_data import PrepData
from classes.rf_regression_optimized import RandomForestRegression
from classes.tune_model import TuneModel
from classes.iterate_model import IterateModel

from dotenv import load_dotenv
import pandas as pd
import logging
import os

load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


# Initiate OpenAI Wrapper Class
openai_key = os.getenv("OPENAI_API_KEY")

openai_wrapper = OpenAIWrapper(openai_key)


# Preprocess the dataset based on AI evaluation
dataset = pd.read_csv('../data/Advertising Budget and Sales.csv')

prompt_docs = "../docs/RandomForestRegressorDocs.txt"
performance_tracking_file = "../docs/PerformanceTracking.json"

data_prepper = PrepData(dataset, openai_wrapper, performance_tracking_file)
X, y, label_encoders = data_prepper.preprocess_dataset()


# Perform Random Forest Regression
rf_regressor = RandomForestRegression(
    X=X, 
    y=y, 
    label_encoders=label_encoders
    )

# First round model results:
mse, r2, accuracy, test_size, model_params, feature_importance = rf_regressor.rf_regression()

# Instantiate class for AI-driven model tuning
model_iterator = IterateModel(label_encoders=label_encoders, desired_accuracy=94.5, run_threshold=10, tuner=TuneModel(openai_wrapper, prompt_docs), performance_tracking_file=performance_tracking_file)

model_iterator.iterate_model(X, y, mse, r2, accuracy, test_size, model_params, feature_importance)

  """
INFO:classes.process_data:Performance tracking file reset. Proceeding...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:classes.process_data:
AI Evaluation of the dataset:
{'selected_features': ['Unnamed: 0', 'TV Ad Budget ($)', 'Radio Ad Budget ($)', 'Newspaper Ad Budget ($)'], 'target_column': 'Sales ($)'}

INFO:classes.process_data:
Selected features: ['Unnamed: 0', 'TV Ad Budget ($)', 'Radio Ad Budget ($)', 'Newspaper Ad Budget ($)']

INFO:classes.process_data:
Target column: Sales ($)

INFO:classes.rf_regression_optimized:
(0.5682254999999994, 0.9791893920226531, 94.18482296354863, 0.3, {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'ra