In [None]:
# importing libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# splitting data
from sklearn.model_selection import train_test_split

# models
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

# pipeline
from sklearn.pipeline import Pipeline

# scaler
from sklearn.preprocessing import StandardScaler

# metrics
from sklearn import metrics

class RegressionModels:
    def __init__(self, df:pd.DataFrame, target):
        self.df = df
        self.target = target
        self.tts = dict()
        self.models = {
            'LinR': self.linearRegressionModel(),
            'LogR': self.logisticRegression(),
            'DecT': self.decisionTreeRegression(),
            'PolR': self.polynomialRegression(),
        }
        
        self.scaler = {
            'ss': StandardScaler()
        }
        print('Setting Up the Object...')
    
    def select_x(self, columns=list(self.df.columns)):
        # select some columns specified or all
        self.data = self.df[columns]
        
        # check if target is still in x
        # if true remove it
        if self.target in list(self.data.columns):
            self.data.drop(self.target, axis=1, inplace=True)
        return self.data
    
    def split_train_test(self, test_size=0.2, random_state=0):
        xtrain, xtest, ytrain, ytest = train_test_split(self.data, self.target, test_size=test_size, random_state=random_state)
        self.tts['xtrain'] = xtrain
        self.tts['xtest'] = xtest
        self.tts['ytrain'] = ytrain
        self.ttx['ytest'] = ytest
        
    def metricsResults(ytrue, ypred):
        # Regression metrics
        explained_variance=metrics.explained_variance_score(ytrue, ypred)
        mean_absolute_error=metrics.mean_absolute_error(ytrue, ypred) 
        mse=metrics.mean_squared_error(ytrue, ypred) 
        mean_squared_log_error=metrics.mean_squared_log_error(ytrue, ypred)
        median_absolute_error=metrics.median_absolute_error(ytrue, ypred)
        r2=metrics.r2_score(ytrue, ypred)
        
        results = {
            'explained_variance: ', round(explained_variance,4)  
            'mean_squared_log_error: ', round(mean_squared_log_error,4)
            'r2: ', round(r2,4)
            'MAE: ', round(mean_absolute_error,4)
            'MSE: ', round(mse,4)
            'RMSE: ', round(np.sqrt(mse),4)
        }
        return results
        
        
    def regressionResults(self,model, prediction_training, prediction_testing):
        
        # getting metrics
        results_training = metricsResults(self.tts['xtrain'], prediction_training)
        results_testing = metricsResults(self.tts['xtest'], prediction_testing)
        
        # creating dataframe for results
        training_df = pd.DataFrame(results_training)
        testing_df = pd.DataFrame(results_testing)
        
        # results
        results = pd.concat([training_df, testing_df])
        results.index = ['Training', 'Testing']
        return results
        
    def linearRegressionModel(self, scaler='ss'):
        pipeline = Pipeline(steps = [
            ('preprocessor', self.scaler[scaler] if self.scaler.has_key(scaler) else self.scaler['ss']),
            ('regressor', LinearRegression())
        ])
        pipeline.fit(self.tts['xtrain'], self.tts['ytrain'])
        prediction_train = pipeline.predict(self.tts['xtrain'])
        prediction_test = pipeline.predict(self.tts['xtest'])
        # return model, prediction on training, prediction on test
        return pipeline, self.regressionResults(pipeline, prediction_train, prediction_test), prediction_train, prediction_test
    
    def logisticRegressionModel(self, scaler='ss'):
        pipeline = Pipeline(steps = [
            ('preprocessor', self.scaler[scaler] if self.scaler.has_key(scaler) else self.scaler['ss']),
            ('regressor', LogisticRegression())
        ])
        pipeline.fit(self.tts['xtrain'], self.tts['ytrain'])
        prediction_train = pipeline.predict(self.tts['xtrain'])
        prediction_test = pipeline.predict(self.tts['xtest'])
        # return model, prediction on training, prediction on test
        return pipeline, self.regressionResults(pipeline, prediction_train, prediction_test), prediction_train, prediction_test
    
    def decisionTreeRegressionModel(self):
        pipeline = Pipeline(steps = [
            ('regressor', DecisionTreeRegressor())
        ])
        pipeline.fit(self.tts['xtrain'], self.tts['ytrain'])
        prediction_train = pipeline.predict(self.tts['xtrain'])
        prediction_test = pipeline.predict(self.tts['xtest'])
        # return model, prediction on training, prediction on test
        return pipeline, self.regressionResults(pipeline, prediction_train, prediction_test), prediction_train, prediction_test
        
    def polynomialRegressionModel(self, scaler='ss', degree=2):
        pipeline = Pipeline(steps = [
            ('preprocessor', self.scaler[scaler] if self.scaler.has_key(scaler) else self.scaler['ss']),
            ('polynomial', PolynomialFeatures(degree=degree)),
            ('regressor', DecisionTreeRegressor())
        ])
        pipeline.fit(self.tts['xtrain'], self.tts['ytrain'])
        prediction_train = pipeline.predict(self.tts['xtrain'])
        prediction_test = pipeline.predict(self.tts['xtest'])
        # return model, prediction on training, prediction on test
        return pipeline, self.regressionResults(pipeline, prediction_train, prediction_test), prediction_train, prediction_test
    
    def