In [1]:
import numpy as np                          
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso, Ridge 
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import StackingRegressor
import xgboost as xgb
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import warnings
import pickle
warnings.filterwarnings(action='ignore')
class Regression:

    
    def Linear_Regression(self):
        linear = LinearRegression()
        linear.fit(self.x_train, self.y_train)
        
        y_pred_test = linear.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred_test)
        mae = metrics.mean_absolute_error(self.y_test, y_pred_test)
        r2_score = metrics.r2_score(self.y_test, y_pred_test)
        return linear, mse, mae, r2_score * 100
    
    
    def LassoRegression(self):
        
        lasso_model = Lasso(random_state=42)
        lasso_model.fit(self.x_train, self.y_train)
        
        y_pred = lasso_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return lasso_model, mse, mae, r2 * 100
    
       
    def RidgeRegression(self):
        
        ridge_model = Ridge(random_state=42)
        ridge_model.fit(self.x_train, self.y_train)
        
        y_pred = ridge_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return ridge_model, mse, mae, r2 * 100
    
    def SGDRegression(self):
        sgd_model = SGDRegressor(random_state=42)
        sgd_model.fit(self.x_train, self.y_train)
        
        y_pred = sgd_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return sgd_model, mse, mae, r2 * 100
    
    
    def Polynomial_Regression(self):
        
        best_model = None
        best_mse = 1e18; best_mae = 1e18; best_r2_score = -1; best_degree = 2

        for deg in range(1,1):
            poly = PolynomialFeatures(degree = deg)
            x_train_poly = poly.fit_transform(self.x_train)
            
            linear = LinearRegression()
            linear.fit(x_train_poly,self.y_train)
            
            y_pred_test = linear.predict(poly.fit_transform(self.x_test))
            
            mse = metrics.mean_squared_error(self.y_test, y_pred_test)
            mae = metrics.mean_absolute_error(self.y_test, y_pred_test)
            r2_score = metrics.r2_score(self.y_test, y_pred_test)
            
            if(r2_score > best_r2_score):
                best_model = linear; best_mse = mse; best_mae = mae; best_r2_score = r2_score; best_degree = deg
                
        return best_model, best_mse, best_mae, best_r2_score * 100, best_degree
    
    
    def SVR(self):
        
        svr_model = SVR(kernel='rbf')
        svr_model.fit(self.x_train, self.y_train)

        y_pred = svr_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)

        return svr_model, mse, mae, r2 * 100
    
    
    def NeuralNetworkRegression(self):
        
        nn_model = MLPRegressor(random_state=42, max_iter=10000)  
        nn_model.fit(self.x_train, self.y_train)

        y_pred = nn_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        train_mse=metrics.mean_squared_error(self.y_train, nn_model.predict(self.x_train))
        return nn_model, mse, mae, r2 * 100
    
    
    def GradientBoostingRegression(self):
        
        gb_model = GradientBoostingRegressor(random_state=42) 
        gb_model.fit(self.x_train, self.y_train)

        y_pred = gb_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        train_mse=metrics.mean_squared_error(self.y_train, gb_model.predict(self.x_train))
            
        return gb_model, mse, mae, r2 * 100,train_mse
    
    
    def DecisionTreeRegression(self):
        
        dt_model = DecisionTreeRegressor(random_state=42) 
        dt_model.fit(self.x_train, self.y_train)

        y_pred = dt_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        train_mse=metrics.mean_squared_error(self.y_train, dt_model.predict(self.x_train))
        
        return dt_model, mse, mae, r2 * 100,train_mse
    
    
    def ElasticNetRegression(self):
        
        en_model = ElasticNet(random_state=42)  
        en_model.fit(self.x_train, self.y_train)

        y_pred = en_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        train_mse=metrics.mean_squared_error(self.y_train, en_model.predict(self.x_train))
        
        return en_model, mse, mae, r2 * 100,train_mse

    
    def RandomForestRegression(self):
        
        rf_model = RandomForestRegressor(n_estimators=100,random_state=42)
        rf_model.fit(self.x_train, self.y_train)
        
        y_pred = rf_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        train_mse=metrics.mean_squared_error(self.y_train, rf_model.predict(self.x_train))
        
        return rf_model, mse, mae, r2 * 100,train_mse
    
    
    def AdaBoostRegression(self):
        
        ab_model = AdaBoostRegressor(random_state=42)
        ab_model.fit(self.x_train, self.y_train)
        
        y_pred = ab_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        train_mse=metrics.mean_squared_error(self.y_train, ab_model.predict(self.x_train))
        return ab_model, mse, mae, r2 * 100,train_mse
    
    
    def BaggingRegression(self):
        
        bag_model = BaggingRegressor(n_estimators=100,random_state=42)
        bag_model.fit(self.x_train, self.y_train)
        
        y_pred = bag_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        train_mse=metrics.mean_squared_error(self.y_train, bag_model.predict(self.x_train))
        return bag_model, mse, mae, r2 * 100,train_mse
    
    def KNNLinearRegression(self):
        
        estimators = [
            ('knn', KNeighborsRegressor()),
            ('linear', LinearRegression())
        ]
        stack_model = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())

        stack_model.fit(self.x_train, self.y_train)
        
        y_pred = stack_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        train_mse=metrics.mean_squared_error(self.y_train, stack_model.predict(self.x_train))
        
        return stack_model, mse, mae, r2 * 100,train_mse
    
    
    def XGBoostRegression(self):
        
        xgb_model = xgb.XGBRegressor(n_estimators=100,random_state=42)
        xgb_model.fit(self.x_train, self.y_train)
        
        y_pred = xgb_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        train_mse=metrics.mean_squared_error(self.y_train, xgb_model.predict(self.x_train))
        
        return xgb_model, mse, mae, r2 * 100,train_mse
        
    
   
    
    def FindBestModel(self):
        best = self.list_of_models[0] 
        
        for row in self.list_of_models:
            if(row[4] > best[4]):
                best = row
                
        return best
    
    def GetTable(self):
        
        table_of_models = pd.DataFrame(columns=['name of model','Model','MSE', 'MAE', 'r2_score', 'Polynomial Degree',"MSE TRAIN"])
        for row in self.list_of_models:
            if row[0] != "Polynomial_Regression": row.append(None) # degree
            table_of_models.loc[len(table_of_models)] = row
        table_of_models.sort_values(by='r2_score')
        table_of_models.drop(columns=['Model',],inplace=True)
        return table_of_models
    
    
    def __init__(self, X, Y):
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(X, Y, test_size=0.20, shuffle=True, random_state=42)
        # index: 0             1     2   3   4  (5 if exist)
        # value: name_of_model model MSE MAE r2 poly_degree
        self.list_of_models = [
            ["Linear_Regression", *self.Linear_Regression()],
            ["LassoRegression", *self.LassoRegression()],
            ["RidgeRegression", *self.RidgeRegression()],
            ["SGDRegression", *self.SGDRegression()],
            ["Polynomial_Regression", *self.Polynomial_Regression()],
            ["SVR", *self.SVR()],
            ["NeuralNetworkRegression", *self.NeuralNetworkRegression()],
            ["GradientBoostingRegression", *self.GradientBoostingRegression()],
            ["DecisionTreeRegression", *self.DecisionTreeRegression()],
            ["ElasticNetRegression", *self.ElasticNetRegression()],
            ["RandomForestRegression", *self.RandomForestRegression()],
            ["AdaBoostRegression", *self.AdaBoostRegression()],
            ["BaggingRegression", *self.BaggingRegression()],
            ["KNNLinearRegression", *self.KNNLinearRegression()],
            ["XGBoostRegression", *self.XGBoostRegression()]]
            
        self.best_model = self.FindBestModel()
        
#         with open(f"{self.best_model[0]}{self.best_model[4]}.pkl", "wb") as file:
#             pickle.dump(self.best_model[1], file)
            
        self.table_of_models = self.GetTable()

In [2]:
class hyperparameter_tuning:
    def __init__(self,X,Y):
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(X, Y, test_size=0.20, shuffle=True, random_state=42)
        self.bestParam=[]
        regression_methods = [
            self.Linear_Regression,
            self.LassoRegression,
            self.RidgeRegression,
            self.SGDRegression,
            #self.Polynomial_Regression,
            self.SVR,
            self.NeuralNetworkRegression,
            self.GradientBoostingRegression,
            self.DecisionTreeRegression,
            self.ElasticNetRegression,
            self.RandomForestRegression,
            self.AdaBoostRegression,
            self.BaggingRegression,
            self.KNNLinearRegression,
            self.XGBoostRegression
        ]
        for method in regression_methods:
            print("Calling:", method.__name__)
            method()
            print(self.bestParam[-1])
            print("<----------------------------------------------------------->")

    def Linear_Regression(self):
        param_grid = {
        'fit_intercept': [True, False],
        'normalize': [True, False]}
        linear_regression = LinearRegression()
        grid_search = GridSearchCV(estimator=linear_regression, param_grid=param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["Linear_Regression",grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
        
    def LassoRegression(self):
        lasso = Lasso()
        param_grid = {
            'alpha': [0.01, 0.1, 1, 10, 100],
            'max_iter': [1000, 2000, 3000]  # Example values for max_iter
        }
        grid_search = GridSearchCV(lasso, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["LassoRegression",grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def RidgeRegression(self):
        ridge = Ridge()
        param_grid = {
            'alpha': [0.01, 0.1, 1, 10, 100],
            'max_iter': [1000, 2000, 3000]  # Example values for max_iter
        }
        grid_search = GridSearchCV(ridge, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["RidgeRegression",grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def SGDRegression(self):
        sgd = SGDRegressor()
        param_grid = {
            'alpha': [0.0001, 0.001, 0.01],
            'max_iter': [5000, 6000, 7000],
            'learning_rate': ['constant', 'optimal']
        }
        grid_search = GridSearchCV(sgd, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["SGDRegression",grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def Polynomial_Regression(self):
        param_grid = {'polynomialfeatures__degree': [2,3,5]}

        pipeline = make_pipeline(PolynomialFeatures(), LinearRegression())
        
        grid_search = GridSearchCV(pipeline, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["Polynomial_Regression",grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def SVR(self):
        param_grid = {
            'kernel': ['rbf', 'linear', 'poly'],
            'C': [0.1, 1, 10],
            'gamma': [0.01, 0.1, 1],
            'epsilon': [0.1, 0.2, 0.5]
        }
        svr = SVR()
        grid_search = GridSearchCV(svr, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["SVR",grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def NeuralNetworkRegression(self):
        param_grid = {
        'hidden_layer_sizes': [(100,), (50, 50), (100, 50, 25)],
        'activation': ['relu', 'tanh'],
        'solver': ['adam', 'sgd'],
        'alpha': [0.0001, 0.001, 0.01],
        'learning_rate': ['constant', 'adaptive'],
        'max_iter': [100, 200, 300]
        }

        mlp = MLPRegressor()
        grid_search = GridSearchCV(mlp, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["NeuralNetworkRegression",grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def GradientBoostingRegression(self):
        param_grid = {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.05, 0.1, 0.2],
        'max_depth': [3, 4, 5],
        }
        gbr = GradientBoostingRegressor()
        grid_search = GridSearchCV(gbr, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["GradientBoostingRegression",grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def DecisionTreeRegression(self):
        param_grid = {
        'max_depth': [None, 5, 10, 15, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        }
        dt = DecisionTreeRegressor()
        grid_search = GridSearchCV(dt, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["DecisionTreeRegression",grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def ElasticNetRegression(self):
        param_grid = {
            'alpha': [0.1, 0.5, 1.0],
            'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9],
            'max_iter': [1000, 2000, 3000]
        }
        elastic_net = ElasticNet()
        grid_search = GridSearchCV(elastic_net, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["ElasticNetRegression", grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def RandomForestRegression(self):
        param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [1, 5, 10, 15, 20],
        'min_samples_split': [2, 5, 10],
        }
        rf = RandomForestRegressor()
        grid_search = GridSearchCV(rf, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["RandomForestRegression", grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def AdaBoostRegression(self):
        param_grid = {
            'n_estimators': [50, 100, 200],
            'learning_rate': [0.01, 0.1, 1.0]
        }
        ada_boost = AdaBoostRegressor()
        grid_search = GridSearchCV(ada_boost, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["AdaBoostRegression", grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def BaggingRegression(self):
        param_grid = {
            'n_estimators': [10, 50, 100],
            'max_samples': [0.5, 1.0],
            'max_features': [0.5, 1.0],
        }
        bagging = BaggingRegressor()
        grid_search = GridSearchCV(bagging, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["BaggingRegression", grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def KNNLinearRegression(self):
        param_grid = {
            'n_neighbors': [3, 5, 7, 9, 11],
            'weights': ['uniform', 'distance'],
            'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
            'p': [1, 2]
        }
        knn = KNeighborsRegressor()
        grid_search = GridSearchCV(knn, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["KNNLinearRegression", grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)
    def XGBoostRegression(self):
        param_grid = {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.1, 0.3],
            'max_depth': [3, 5, 7],
            'subsample': [0.5, 0.8, 1.0],
        }
        xgb = XGBRegressor()
        grid_search = GridSearchCV(xgb, param_grid, cv=5)
        grid_search.fit(self.x_train, self.y_train)
        self.bestParam.append(["XGBoostRegression", grid_search.best_params_])
        
        y_train_pred = grid_search.best_estimator_.predict(self.x_train)
        r2_train = r2_score(self.y_train, y_train_pred)
        print("R2 score on training set:", r2_train)

        y_test_pred = grid_search.best_estimator_.predict(self.x_test)
        r2_test = r2_score(self.y_test, y_test_pred)
        print("R2 score on test set:", r2_test)

In [3]:
df=pd.read_csv("PreprocessedDF.csv")

In [4]:
df=df.drop(["Unnamed: 0","index","Song","Album","Artist Names","Spotify Link","Song Image","Spotify URI","image_description","color_values"],axis=1)

In [5]:
df

Unnamed: 0,Hot100 Rank,safe_log Song Length(ms),Popularity,sqrt Acousticness,Danceability,Energy,safe_log Instrumentalness,safe_log Liveness,Loudness,safe_log Speechiness,...,deep dance pop,nursery,hel,nashville indie,alabama rap,boston rock,pop romantico,romanian house,romanian pop,moldovan pop
0,44,12.345078,62,0.141421,0.478,0.736,-9.253143,-2.137071,-7.124,-3.304978,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,43,12.579703,63,0.482701,0.588,0.522,-18.420681,-1.427116,-6.254,-3.262305,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,86,12.193256,49,0.689928,0.313,0.600,-10.572528,-0.648174,-7.913,-3.332604,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,20,12.038044,43,0.986408,0.503,0.059,-18.420681,-2.273026,-16.131,-3.001750,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,50,12.433260,0,0.122066,0.843,0.348,-6.660887,-1.398367,-10.669,-2.909554,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6186,88,12.607165,56,0.107703,0.875,0.862,-8.177120,-2.721135,-7.694,-2.780621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6187,8,12.621663,71,0.434741,0.787,0.799,-18.420681,-1.203973,-4.680,-2.309610,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6188,43,12.353915,60,0.511859,0.684,0.819,-6.229717,-2.180367,-3.309,-1.435485,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6189,7,12.463911,75,0.144222,0.583,0.786,-18.420681,-1.671313,-3.142,-3.503230,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
y=np.array(df["Popularity"])
x=np.array(df.drop(["Popularity"],axis=1))

In [7]:
scaler=StandardScaler()
x=scaler.fit_transform(x)

In [None]:
cls=hyperparameter_tuning(x,y)

Calling: Linear_Regression
R2 score on training set: 0.6391773362774602
R2 score on test set: -2.7028155999583746e+27
['Linear_Regression', {'fit_intercept': False, 'normalize': True}]
<----------------------------------------------------------->
Calling: LassoRegression
R2 score on training set: 0.6273937085573732
R2 score on test set: 0.5643734944097024
['LassoRegression', {'alpha': 0.1, 'max_iter': 1000}]
<----------------------------------------------------------->
Calling: RidgeRegression
R2 score on training set: 0.6454968383811319
R2 score on test set: 0.5311113684674005
['RidgeRegression', {'alpha': 100, 'max_iter': 1000}]
<----------------------------------------------------------->
Calling: SGDRegression
R2 score on training set: -3.1370674160045195e+19
R2 score on test set: -1.8561404321121628e+18
['SGDRegression', {'alpha': 0.01, 'learning_rate': 'optimal', 'max_iter': 5000}]
<----------------------------------------------------------->
Calling: SVR


In [None]:
# cls=Regression(x,y)

In [None]:
#cls.table_of_models["MSE"][10]