In [2]:
from script.configuration import FINAL_PATH
from modeling.model import Agent

In [3]:
import pandas as pd
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
    RandomForestRegressor,
    AdaBoostRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
)

In [4]:
df = pd.read_csv(FINAL_PATH)

In [5]:
df.head(2)

Unnamed: 0,experience_level,employment_type,employee_residence,remote_ratio,company_location,company_size,new_job_title,sqrt_salary_in_usd,Cluster_Label
0,3,2,5,0,5,0,5,292.997,1
1,2,0,7,0,7,2,3,173.205,0


In [6]:
X = df.drop(columns=["sqrt_salary_in_usd"])
y = df["sqrt_salary_in_usd"]

In [7]:
models = {
    "Random Forest": RandomForestRegressor(),
    "Ada Boost": AdaBoostRegressor(),
    "SVR": SVR(),
    "xgboost": XGBRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Extra Trees": ExtraTreesRegressor()
}

In [8]:
metrics_list = []

In [9]:
for name, model in models.items():
    agent = Agent(model)

    agent.train(X, y)

    y_pred = agent.prediction()

    metrics = agent.evaluate(y_pred)

    metrics_dict = {
            "mean_squared_error": round(metrics.mse, 3),
            "mean_absolute_error": round(metrics.mae, 3),
            "root_mean_squared_error": round(metrics.rmse, 3),
            "r2_score": round(metrics.r2, 3),
        }
    
    metrics_list.append(metrics_dict)

In [10]:
models_df = pd.DataFrame(metrics_list, index=models.keys())

In [11]:
models_df

Unnamed: 0,mean_squared_error,mean_absolute_error,root_mean_squared_error,r2_score
Random Forest,4278.746,51.892,65.412,0.466
Ada Boost,5529.607,60.284,74.361,0.31
SVR,5485.887,58.029,74.067,0.316
xgboost,4499.771,52.521,67.08,0.439
Gradient Boosting,4258.685,51.591,65.259,0.469
Decision Tree,4595.057,53.081,67.787,0.427
Extra Trees,4430.134,52.6,66.559,0.447
