In [5]:
import pandas as pd
from tabulate import tabulate
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import sys
sys.path.append("../ml-model/")
sys.path.append("..")

from SpotifyPreProcessing import DataPreProcessing

import pickle

df = pd.read_csv("../data/SpotifySongPolularityAPIExtract.csv",low_memory=False)
df = DataPreProcessing(df)

X = df.drop(["popularity"], axis=1)
y = df["popularity"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

df_no_feat = pd.DataFrame(index=["MAE", "MSE", "R2"], columns=["Linear_Regression", "Ridge_Regression", "Lasso_Regressor", "SGD_Regressor", "Linear_SVR"])

for model in ["Linear_Regression", "Ridge_Regression", "Lasso_Regressor", "SGD_Regressor", "Linear_SVR"]:
    for metrics in ["MAE", "MSE", "R2"]:
        loaded_model = pickle.load(open(f'../data/{model}_({metrics}).sav', 'rb'))
        y_pred = loaded_model.predict(X_test)
        if metrics == "MAE":
            df_no_feat.loc[metrics, model] = mean_absolute_error(y_test, y_pred)
        elif metrics == "MSE":
            df_no_feat.loc[metrics, model] = mean_squared_error(y_test, y_pred)
        elif metrics == "R2":
            df_no_feat.loc[metrics, model] = r2_score(y_test, y_pred)

print(tabulate(df_no_feat, headers='keys', tablefmt="psql"))

df_feat = pd.DataFrame(index=["MAE", "MSE", "R2"], columns=["Linear_Regression", "Ridge_Regression", "Lasso_Regressor", "SGD_Regressor", "Linear_SVR"])

for model in ["Linear_Regression", "Ridge_Regression", "Lasso_Regressor", "SGD_Regressor", "Linear_SVR"]:
    for metrics in ["MAE", "MSE", "R2"]:
        loaded_model = pickle.load(open(f'../data/{model}_with_Feat_({metrics}).sav', 'rb'))
        y_pred = loaded_model.predict(X_test)
        if metrics == "MAE":
            df_feat.loc[metrics, model] = mean_absolute_error(y_test, y_pred)
        elif metrics == "MSE":
            df_feat.loc[metrics, model] = mean_squared_error(y_test, y_pred)
        elif metrics == "R2":
            df_feat.loc[metrics, model] = r2_score(y_test, y_pred)

print(tabulate(df_feat, headers='keys', tablefmt="psql"))

    Linear_Regression Ridge_Regression SGD_Regressor  Linear_SVR
MAE          8.381164        14.943837     15.002551   14.984248
MSE         339.08526       339.064173    338.790602  336.185725
R2          -0.005163          -0.0051     -0.023284   -0.022197
