# Importing Necessary Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt

# Defining Regression Class

In [2]:
class regression:
    
    def load_data(self,target_variable):
        self.df = pd.read_csv("D:\Study\Masters in Automation and IT\OOP for Data Science\Dataset\RegressionPredictionData.csv")
        self.y = self.df[target_variable]
        self.x = self.df.drop(target_variable,axis=1)
    
    def split(self,t_size,r_state):
        x_tr,x_ts,y_tr,y_ts = train_test_split(self.x,self.y,test_size = t_size,random_state = r_state)
        self.x_train = x_tr
        self.x_test = x_ts
        self.y_train = y_tr
        self.y_test = y_ts
        
    def linear_regression(self):
        self.model = LinearRegression()
        self.model.fit(self.x_train,self.y_train)
        self.y_predict_linear = self.model.predict(self.x_test)
        
    def polynomial_regression(self,k_fold,degree):

        estimator = Pipeline([("polynomial_features", PolynomialFeatures()),("linear_regression", LinearRegression())])
                    
        params = {'polynomial_features__degree': degree}

        grid = GridSearchCV(estimator, params, cv=k_fold)

        grid.fit(self.x_train,self.y_train)
        self.best_score_poly = grid.best_score_
        self.best_params_poly = grid.best_params_ 
                    
        self.y_predict_poly = grid.predict(self.x_test)

    def ridge_regression(self, alpha_values, k_fold):
        ridge_model = Ridge()
    
        param = {'alpha': alpha_values}
        
        grid = GridSearchCV(estimator=ridge_model, param_grid=param, cv=k_fold)
        
        grid.fit(self.x_train, self.y_train)
        
        self.best_alpha = grid.best_params_['alpha']
        self.best_score_ridge = grid.best_score_
        self.best_ridge_model = grid.best_estimator_
        
        self.y_predict_ridge = self.best_ridge_model.predict(self.x_test)
   
    def lasso_regression(self, alpha_values, k_fold):
        lasso_model = Lasso()
    
        param = {'alpha': alpha_values}
        
        grid = GridSearchCV(estimator=lasso_model, param_grid=param, cv=k_fold)
        
        grid.fit(self.x_train, self.y_train)
        
        self.best_alpha = grid.best_params_['alpha']
        self.best_score_lasso = grid.best_score_
        self.best_lasso_model = grid.best_estimator_
        
        self.y_predict_lasso = self.best_lasso_model.predict(self.x_test)
   
    def error_metric(self,pred_val,actual_val):
        r2score = r2_score(pred_val,actual_val)
        print(r2score)

In [3]:
data = regression()
data.load_data('pH-Wert')
print(data.df)
data.split(0.3,101)

            Datum  BHKW1_Biogas  BHKW2_Biogas  Methangehalt CH4   TS-Wert  \
0     734124.9993   1262.000000   4790.000000         53.530090  7.442129   
1     734125.0827   1222.583790   4802.499855         53.530090  7.959729   
2     734125.1660   1183.167579   4814.999711         53.530090  7.453511   
3     734125.2493   1143.751369   4827.499566         53.530090  8.026361   
4     734125.3327   1104.335158   4839.999421         53.530090  7.560807   
...           ...           ...           ...               ...       ...   
1616  734259.6660   2510.665494   5129.999190         52.710275  8.032442   
1617  734259.7493   2535.999120   5147.499392         52.408863  8.414378   
1618  734259.8327   2561.332747   5164.999595         52.107451  8.796314   
1619  734259.9160   2586.666373   5182.499797         51.806039  9.178250   
1620  734259.9993   2612.000000   5200.000000         51.504627  9.560185   

       pH-Wert  
0     7.684027  
1     7.706535  
2     7.691780  
3     7

# Linear Regression input and output

In [4]:
data.linear_regression()
print("Linear Regression: ",end=" ")
data.error_metric(data.y_predict_linear,data.y_test)

Linear Regression:  -0.7114070225510272


# Polynomial Regression input and output

In [5]:
data.polynomial_regression(k_fold= 5,degree=[2,3,4,5 ])               # k_fold=5,degree=[2,3,4,5]     
print("Polynomial Regression: ",end=" ")
print(data.best_params_poly)
data.error_metric(data.y_predict_poly,data.y_test)

Polynomial Regression:  {'polynomial_features__degree': 3}
0.2049853917873754


# Lasso Regression input and output

In [6]:
data.lasso_regression(alpha_values = [ 0.01,0.1,1],k_fold= 5)     # alpha_values = [0.01,0.1,1],k_fold=5
print("Lasso Regression: ",end=" ")
print(data.best_alpha)
data.error_metric(data.y_predict_lasso,data.y_test)

Lasso Regression:  0.01
-0.8569582218271403


# Ridge Regression input and output

In [17]:
data.ridge_regression(alpha_values = [ 0.01,0.1,1],k_fold= 5)     # alpha_values = [0.01,0.1,1],k_fold=5
print("Ridge Regression: ",end=" ")
print(data.best_alpha)
data.error_metric(data.y_predict_ridge,data.y_test)

Ridge Regression:  1
-0.7116092583182245
