In [None]:
#Import Libraries
import sys
import os
import pandas as pd
from datetime import datetime

In [None]:
# Add the '../scripts' directory to the Python module search path.
# This allows us to import modules from that directory, 
# facilitating better organization of our code and access to shared scripts.
sys.path.append(os.path.abspath('../scripts'))

In [None]:
# Import the modules
import data_processing as dp
import model as m


In [None]:


# Load and clean data
filepath = '../data/insurance.csv'
data = dp.load_and_clean_data(filepath)

# Encode and scale the data
columns_label = ['sex', 'smoker', 'region']
columns_onehot = ['sex', 'smoker', 'region', 'children']
df_label = dp.encoder('labelEncoder', data, columns_label, columns_onehot)

columns_scaler = ['charges']
df_scaled = dp.scaler('minMaxScaler', df_label, columns_scaler)

# Split the data
X = df_scaled.drop('charges', axis=1)
y = df_scaled['charges']
X_train, X_test, y_train, y_test = m.split_data(X, y)

# Train models
lr_model, dt_model, rfr_model, xgb_model = m.train_models(X_train, y_train)

# Evaluate models
models = ['Linear Regression', 'Decision Tree', 'Random Forest', 'XGBoost']
mae_scores, mse_scores, r2_scores = [], [], []
for model in [lr_model, dt_model, rfr_model, xgb_model]:
    mae, mse, r2, _ = m.evaluate_model(model, X_test, y_test)
    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)

# Plot metrics
m.plot_metrics(models, mae_scores, mse_scores, r2_scores)
