In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge,Lasso,ElasticNet, RidgeCV,LassoCV,ElasticNetCV
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import BaggingRegressor,RandomForestRegressor
import sklearn.metrics as mt
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures,StandardScaler

In [5]:
data = pd.read_csv("advertising.csv")
df = data.copy()

y = df["Sales"]
X = df.drop("Sales", axis=1)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)

scy = StandardScaler()
scx = StandardScaler()

X_train = scx.fit_transform(X_train)
X_test = scy.fit_transform(X_test)


class Regression():
    def __init__(self,X_train,X_test,y_train,y_test,X,y):
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.X = X
        self.y = y
    def basla(self):
        self.all_regression()
        pd.options.display.float_format = '{:,.4f}'.format
        self.sonuclar = pd.DataFrame(self.sonuclar,
                                     columns=["Model","R2","RMSE","MSE","Doğrulama","Polinom Derecesi"]
                                    ).sort_values(by='R2', ascending=False)
        return self.sonuclar
    
    def all_regression(self):
        self.hyperparameter()
        #LINEAR REGRESSION
        self.lin_model = LinearRegression()
        self.lin_model.fit(self.X_train,self.y_train)
        self.lin_tahmin = self.lin_model.predict(self.X_test)
        
        #POLYNOMIAL FEATURES
        self.polynomial()
        self.poly = PolynomialFeatures(degree=self.optimal_degree)
        self.X_poly_train = self.poly.fit_transform(self.X_train)
        self.X_poly_test = self.poly.transform(self.X_test)
        self.poly_model = LinearRegression()
        self.poly_model.fit(self.X_poly_train, self.y_train)
        self.poly_tahmin = self.poly_model.predict(self.X_poly_test)
        
        #RIDGE
        self.ridge_model = Ridge(alpha=self.ridge_lamb)
        self.ridge_model.fit(self.X_train,self.y_train)
        self.ridge_tahmin = self.ridge_model.predict(self.X_test)
        
        #LASSO
        self.lasso_model = Lasso(alpha=self.lasso_lamb)
        self.lasso_model.fit(self.X_train,self.y_train)
        self.lasso_tahmin = self.lasso_model.predict(self.X_test)
        
        #ELASTIC NET
        self.elas_model = ElasticNet(alpha=self.elastic_lamb)
        self.elas_model.fit(self.X_train,self.y_train)
        self.elas_tahmin = self.elas_model.predict(self.X_test)
        
        #DECISION TREE REGRESSOR
        self.decisiontree()
        self.dec_model = DecisionTreeRegressor(random_state=0,
                                               max_leaf_nodes=self.max_leaf_nodes_value,
                                               min_samples_split=self.min_samples_split_value
                                              )
        self.dec_model.fit(self.X_train,self.y_train)
        self.dec_tahmin = self.dec_model.predict(self.X_test)
        
        #BAGGING REGRESSOR
        self.bagging()
        self.bg_model = BaggingRegressor(random_state=0,n_estimators=self.n_estimators_value)
        self.bg_model.fit(self.X_train,self.y_train)
        self.bg_tahmin = self.bg_model.predict(self.X_test)
        
        #RANDOM FOREST REGRESSOR
        self.randomforest()
        self.rf_model = RandomForestRegressor(random_state=0,
                                              max_depth=self.max_depth_value,
                                              max_features=self.max_features_value,
                                              n_estimators= self.n_estimators_rf
                                             )
        self.rf_model.fit(self.X_train,self.y_train)
        self.rf_tahmin = self.rf_model.predict(self.X_test)
        
        #SVR
        self.svr()
        self.svr_model = SVR(C=self.c_value,gamma=self.gamma_value,epsilon=self.epsilon_value,kernel=self.kernel_value)
        self.svr_model.fit(self.X_train,self.y_train)
        self.svr_tahmin = self.svr_model.predict(self.X_test)
        
        self.sonuclar = [("LinearRegression",
                          self.hata_test(self.lin_tahmin)[2],
                          self.hata_test(self.lin_tahmin)[1],
                          self.hata_test(self.lin_tahmin)[0],
                          self.caprazdog(self.lin_model),
                          "0",
                         ),
                         ("PolynomialFeatures",
                          self.hata_test(self.poly_tahmin)[2],
                          self.hata_test(self.poly_tahmin)[1],
                          self.hata_test(self.poly_tahmin)[0],
                          self.caprazdog(self.poly_model),
                          str(self.optimal_degree),
                         ),
                         ("RidgeRegression",
                          self.hata_test(self.ridge_tahmin)[2],
                          self.hata_test(self.ridge_tahmin)[1],
                          self.hata_test(self.ridge_tahmin)[0],
                          self.caprazdog(self.ridge_model),
                          "0",
                         ),
                         ("LassoRegression",
                          self.hata_test(self.lasso_tahmin)[2],
                          self.hata_test(self.lasso_tahmin)[1],
                          self.hata_test(self.lasso_tahmin)[0],
                          self.caprazdog(self.lasso_model),
                          "0",
                         ),
                         ("ElasticNetRegression",
                          self.hata_test(self.elas_tahmin)[2],
                          self.hata_test(self.elas_tahmin)[1],
                          self.hata_test(self.elas_tahmin)[0],
                          self.caprazdog(self.elas_model),
                          "0",
                         ),
                         ("DecisionTreeRegressor",
                          self.hata_test(self.dec_tahmin)[2],
                          self.hata_test(self.dec_tahmin)[1],
                          self.hata_test(self.dec_tahmin)[0],
                          self.caprazdog(self.dec_model),
                          "0",
                         ),
                         ("BaggingRegressor",
                          self.hata_test(self.bg_tahmin)[2],
                          self.hata_test(self.bg_tahmin)[1],
                          self.hata_test(self.bg_tahmin)[0],
                          self.caprazdog(self.bg_model),
                          "0",
                         ),
                         ("RandomForestRegressor",
                          self.hata_test(self.rf_tahmin)[2],
                          self.hata_test(self.rf_tahmin)[1],
                          self.hata_test(self.rf_tahmin)[0],
                          self.caprazdog(self.rf_model),
                          "0",
                         ),
                         ("SVR",
                          self.hata_test(self.svr_tahmin)[2],
                          self.hata_test(self.svr_tahmin)[1],
                          self.hata_test(self.svr_tahmin)[0],
                          self.caprazdog(self.svr_model),
                          "0",
                         )
           ]
    def hata_test(self,tahmin):
        self.mse = mt.mean_squared_error(self.y_test,tahmin,squared=True)
        self.rmse = mt.mean_squared_error(self.y_test,tahmin,squared=False)
        self.r2 = mt.r2_score(self.y_test,tahmin)
        return [self.mse,self.rmse,self.r2]
    
    def caprazdog(self,model):
        self.dogruluk = cross_val_score(model,self.X,self.y,cv=10)
        return self.dogruluk.mean()
    
    def hyperparameter(self):
        def ridge():
            self.lambdalar = 10**np.linspace(10,-2,100)*0.5
            self.ridge_lamb = RidgeCV(alphas=self.lambdalar, scoring="r2").fit(self.X_train,self.y_train).alpha_
        def lasso():
            self.lasso_lamb = LassoCV(cv=10,max_iter=10000).fit(self.X_train,self.y_train).alpha_
        def elasticnet():
            self.elastic_lamb = ElasticNetCV(cv=10,max_iter=10000).fit(self.X_train,self.y_train).alpha_
        return ridge(),lasso(),elasticnet()
    def decisiontree(self):
        self.dec_model = DecisionTreeRegressor(random_state=0)
        self.dec_model.fit(self.X_train,self.y_train)
        self.dec_tahmin = self.dec_model.predict(self.X_test)

        self.dtparametreler = {
            "min_samples_split":range(2,25),
            "max_leaf_nodes":range(2,25)
        }
        self.dtgrid = GridSearchCV(estimator=self.dec_model,param_grid=self.dtparametreler,cv=10)
        self.dtgrid.fit(self.X_train,self.y_train)
        self.max_leaf_nodes_value = self.dtgrid.best_params_['max_leaf_nodes']
        self.min_samples_split_value = self.dtgrid.best_params_['min_samples_split']
        return self.max_leaf_nodes_value,self.min_samples_split_value
    def bagging(self):
        self.bg_model = BaggingRegressor(random_state=0)
        self.bg_model.fit(self.X_train,self.y_train)
        self.bg_tahmin = self.bg_model.predict(self.X_test)
        
        self.bgparametreler = {
            "n_estimators":range(2,25)
        }
        self.bggrid = GridSearchCV(estimator=self.bg_model,param_grid=self.bgparametreler,cv=10)
        self.bggrid.fit(self.X_train,self.y_train)
        self.n_estimators_value = self.bggrid.best_params_['n_estimators']
        return self.n_estimators_value
    def randomforest(self):
        self.rf_model = RandomForestRegressor(random_state=0)
        self.rf_model.fit(self.X_train,self.y_train)
        self.rf_tahmin = self.rf_model.predict(self.X_test)
        
        self.rfparametreler = {
            "max_depth":range(2,25),
            "max_features":range(2,25),
            "n_estimators":range(2,25),
        }
        self.rfgrid = GridSearchCV(estimator=self.rf_model,param_grid=self.rfparametreler,cv=10)
        self.rfgrid.fit(self.X_train,self.y_train)
        self.max_depth_value = self.rfgrid.best_params_['max_depth']
        self.max_features_value = self.rfgrid.best_params_['max_features']
        self.n_estimators_rf = self.rfgrid.best_params_['n_estimators']
        return self.max_depth_value,self.max_features_value,self.n_estimators_rf
    def polynomial(self):
        self.degree_range = range(2,25)
        self.r2_list = []
        for self.degree in self.degree_range:
            self.poly = PolynomialFeatures(degree=self.degree)
            self.X_poly_train = self.poly.fit_transform(self.X_train)
            self.X_poly_test = self.poly.transform(self.X_test)

            self.poly_model = LinearRegression()
            self.poly_model.fit(self.X_poly_train, self.y_train)
            self.poly_tahmin = self.poly_model.predict(self.X_poly_test)
            self.r2 = mt.r2_score(self.y_test,self.poly_tahmin)
            self.r2_list.append(self.r2)
        self.optimal_degree = self.degree_range[np.argmax(self.r2_list)]
    def svr(self):
        self.param_grid = {
            'C': [1, 10, 100],
            'kernel': ['linear', 'rbf'],
            'gamma': [1, 0.1, 0.01],
            'epsilon': [0.1, 0.01, 0.001]
        }
        self.svr = SVR()
        self.grid_search = GridSearchCV(self.svr, self.param_grid, cv=10)
        self.grid_search.fit(self.X_train, self.y_train)
        
        self.c_value = self.grid_search.best_params_['C']
        self.kernel_value = self.grid_search.best_params_['kernel']
        self.gamma_value = self.grid_search.best_params_['gamma']
        self.epsilon_value = self.grid_search.best_params_['epsilon']
        return self.c_value,self.kernel_value,self.gamma_value,self.epsilon_value

In [6]:
start = Regression(X_train,X_test,y_train,y_test,X,y)
start.basla()

Unnamed: 0,Model,R2,RMSE,MSE,Doğrulama,Polinom Derecesi
1,PolynomialFeatures,0.9405,1.3945,1.9448,0.8915,2
6,BaggingRegressor,0.9403,1.3974,1.9527,0.9426,0
7,RandomForestRegressor,0.9308,1.5041,2.2624,0.9432,0
8,SVR,0.9269,1.546,2.3901,0.5489,0
0,LinearRegression,0.8973,1.8322,3.3569,0.8915,0
2,RidgeRegression,0.897,1.8345,3.3653,0.8915,0
4,ElasticNetRegression,0.8962,1.8416,3.3914,0.8915,0
3,LassoRegression,0.8953,1.8496,3.4211,0.8916,0
5,DecisionTreeRegressor,0.8462,2.242,5.0267,0.9019,0
