In [11]:
import numpy as np                          
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression, Lasso, Ridge 
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import StackingRegressor
import xgboost as xgb

from sklearn import metrics

import pickle

class Regression:

    
    def Linear_Regression(self):
        linear = LinearRegression()
        linear.fit(self.x_train, self.y_train)
        
        y_pred_test = linear.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred_test)
        mae = metrics.mean_absolute_error(self.y_test, y_pred_test)
        r2_score = metrics.r2_score(self.y_test, y_pred_test)
        return linear, mse, mae, r2_score * 100
    
    
    def LassoRegression(self):
        
        lasso_model = Lasso(random_state=42)
        lasso_model.fit(self.x_train, self.y_train)
        
        y_pred = lasso_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return lasso_model, mse, mae, r2 * 100
    
       
    def RidgeRegression(self):
        
        ridge_model = Ridge(random_state=42)
        ridge_model.fit(self.x_train, self.y_train)
        
        y_pred = ridge_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return ridge_model, mse, mae, r2 * 100
    
    def SGDRegression(self):
        sgd_model = SGDRegressor(random_state=42)
        sgd_model.fit(self.x_train, self.y_train)
        
        y_pred = sgd_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return sgd_model, mse, mae, r2 * 100
    
    
    def Polynomial_Regression(self):
        
        best_model = None
        best_mse = 1e18; best_mae = 1e18; best_r2_score = -1; best_degree = 2

        for deg in range(2,4):
            poly = PolynomialFeatures(degree = deg)
            x_train_poly = poly.fit_transform(self.x_train)
            
            linear = LinearRegression()
            linear.fit(x_train_poly,self.y_train)
            
            y_pred_test = linear.predict(poly.fit_transform(self.x_test))
            
            mse = metrics.mean_squared_error(self.y_test, y_pred_test)
            mae = metrics.mean_absolute_error(self.y_test, y_pred_test)
            r2_score = metrics.r2_score(self.y_test, y_pred_test)
            
            if(r2_score > best_r2_score):
                best_model = linear; best_mse = mse; best_mae = mae; best_r2_score = r2_score; best_degree = deg
                
        return best_model, best_mse, best_mae, best_r2_score * 100, best_degree
    
    
    def SVR(self):
        
        svr_model = SVR(kernel='rbf')
        svr_model.fit(self.x_train, self.y_train)

        y_pred = svr_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)

        return svr_model, mse, mae, r2 * 100
    
    
    def NeuralNetworkRegression(self):
        
        nn_model = MLPRegressor(random_state=42, max_iter=10000)  
        nn_model.fit(self.x_train, self.y_train)

        y_pred = nn_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)

        return nn_model, mse, mae, r2 * 100
    
    
    def GradientBoostingRegression(self):
        
        gb_model = GradientBoostingRegressor(random_state=42) 
        gb_model.fit(self.x_train, self.y_train)

        y_pred = gb_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)

        return gb_model, mse, mae, r2 * 100
    
    
    def DecisionTreeRegression(self):
        
        dt_model = DecisionTreeRegressor(random_state=42) 
        dt_model.fit(self.x_train, self.y_train)

        y_pred = dt_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)

        return dt_model, mse, mae, r2 * 100
    
    
    def ElasticNetRegression(self):
        
        en_model = ElasticNet(random_state=42)  
        en_model.fit(self.x_train, self.y_train)

        y_pred = en_model.predict(self.x_test)

        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)

        return en_model, mse, mae, r2 * 100

    
    def RandomForestRegression(self):
        
        rf_model = RandomForestRegressor(random_state=42)
        rf_model.fit(self.x_train, self.y_train)
        
        y_pred = rf_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return rf_model, mse, mae, r2 * 100
    
    
    def AdaBoostRegression(self):
        
        ab_model = AdaBoostRegressor(random_state=42)
        ab_model.fit(self.x_train, self.y_train)
        
        y_pred = ab_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return ab_model, mse, mae, r2 * 100
    
    
    def BaggingRegression(self):
        
        bag_model = BaggingRegressor(random_state=42)
        bag_model.fit(self.x_train, self.y_train)
        
        y_pred = bag_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return bag_model, mse, mae, r2 * 100
    
    def KNNLinearRegression(self):
        
        estimators = [
            ('knn', KNeighborsRegressor()),
            ('linear', LinearRegression())
        ]
        stack_model = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())

        stack_model.fit(self.x_train, self.y_train)
        
        y_pred = stack_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
        
        return stack_model, mse, mae, r2 * 100
    
    
    def XGBoostRegression(self):
        
        xgb_model = xgb.XGBRegressor(random_state=42)
        xgb_model.fit(self.x_train, self.y_train)
        
        y_pred = xgb_model.predict(self.x_test)
        
        mse = metrics.mean_squared_error(self.y_test, y_pred)
        mae = metrics.mean_absolute_error(self.y_test, y_pred)
        r2 = metrics.r2_score(self.y_test, y_pred)
            
        return xgb_model, mse, mae, r2 * 100
        
    
   
    
    def FindBestModel(self):
        best = self.list_of_models[0] 
        
        for row in self.list_of_models:
            if(row[4] > best[4]):
                best = row
                
        return best
    
    def GetTable(self):
        
        table_of_models = pd.DataFrame(columns=['name of model','Model','MSE', 'MAE', 'r2_score', 'Polynomial Degree'])
        for row in self.list_of_models:
            if row[0] != "Polynomial_Regression": row.append(None) # degree
            table_of_models.loc[len(table_of_models)] = row
        table_of_models.sort_values(by='r2_score')
        table_of_models.drop(columns=['Model',],inplace=True)
        return table_of_models
    
    
    def __init__(self, X, Y):
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(X, Y, test_size=0.20, shuffle=True, random_state=42)
        # index: 0             1     2   3   4  (5 if exist)
        # value: name_of_model model MSE MAE r2 poly_degree
        self.list_of_models = [
            ["Linear_Regression", *self.Linear_Regression()],
            ["LassoRegression", *self.LassoRegression()],
            ["RidgeRegression", *self.RidgeRegression()],
            ["SGDRegression", *self.SGDRegression()],
            ["Polynomial_Regression", *self.Polynomial_Regression()],
            ["SVR", *self.SVR()],
            ["NeuralNetworkRegression", *self.NeuralNetworkRegression()],
            ["GradientBoostingRegression", *self.GradientBoostingRegression()],
            ["DecisionTreeRegression", *self.DecisionTreeRegression()],
            ["ElasticNetRegression", *self.ElasticNetRegression()],
            ["RandomForestRegression", *self.RandomForestRegression()],
            ["AdaBoostRegression", *self.AdaBoostRegression()],
            ["BaggingRegression", *self.BaggingRegression()],
            ["KNNLinearRegression", *self.KNNLinearRegression()],
            ["XGBoostRegression", *self.XGBoostRegression()]]
            
        self.best_model = self.FindBestModel()
        
        with open(f"{self.best_model[0]}{self.best_model[4]}.pkl", "wb") as file:
            pickle.dump(self.best_model[1], file)
            
        self.table_of_models = self.GetTable()