### Preparing the dataset

**Importing some libraries and packages**

In [41]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold

**Importing the dataset**

In [51]:
dataset = pd.read_excel('/Users/dakshbhuva/Desktop/ENB2012_data.xlsx')
dataset.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y2,Unnamed: 9,Unnamed: 10
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,21.33,,
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,21.33,,
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,21.33,,
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,21.33,,
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,28.28,,


**Extracting the required columns**

In [53]:
cols_to_use = ['X1','X2','X3','X4','X5','X6','X7','X8','Y2',]
dataset = dataset[cols_to_use]
dataset.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,28.28


In [44]:
dataset.shape

(768, 9)

In [45]:
dvalues = dataset.values
X, y = dvalues[:, :-1], dvalues[:, -1]
print(X)
print(y)

**Spliting the dataset into Training Data and Testing Data**

In [55]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.3, random_state = 2)

***
### Linear Regression
***

In [49]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(train_X, train_y)

LinearRegression()

In [11]:
pred_y = reg.predict(test_X)

**Accuracy score and mean squared error (MSE)**

In [12]:
reg.score(train_X, train_y)

0.8960068984743137

In [13]:
reg.score(test_X, test_y)

0.8679934073082682

In [14]:
from sklearn.metrics import mean_squared_error

print("MSE", mean_squared_error(test_y, pred_y))

MSE 11.932799400373


**5-fold cross validation based on mean squared error (MSE)**

In [15]:
cv = RepeatedKFold(n_splits=5, n_repeats=1, random_state=1)
scores = cross_val_score(reg, X, y, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)

In [16]:
scores = np.absolute(scores)
print(scores)
print('Mean MSE: %.3f' % (np.mean(scores)))

[13.33425648  9.25534554 11.44645645  8.08679584  9.12829731]
Mean MSE: 10.250


***
### Lasso Regression
***

In [17]:
from sklearn.linear_model import Lasso


lasso_reg = Lasso(alpha=0.1, max_iter=100, tol=0.1)
lasso_reg.fit(train_X, train_y)

Lasso(alpha=0.1, max_iter=100, tol=0.1)

In [18]:
pred_lasso_y = lasso_reg.predict(test_X)

**Accuracy score and mean squared error (MSE)**

In [19]:
lasso_reg.score(train_X, train_y)

0.8496597478733995

In [20]:
lasso_reg.score(test_X, test_y)

0.8210153461763534

In [21]:
print("MSE", mean_squared_error(test_y, pred_lasso_y))

MSE 16.17940381818941


**5-fold cross validation based on mean squared error (MSE)**

In [22]:
cv = RepeatedKFold(n_splits=5, n_repeats=1, random_state=1)
scores = cross_val_score(lasso_reg, X, y, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)

In [23]:
scores = np.absolute(scores)
print(scores)
print('Mean MSE: %.3f' % (np.mean(scores)))

[17.5974518  12.90823341 17.22102097 12.83917627 12.00915707]
Mean MSE: 14.515


***
### Ridge Regression
***

In [24]:
from sklearn.linear_model import Ridge

ridge_reg = Ridge(alpha=0.1, max_iter=100, tol=0.1)
ridge_reg.fit(train_X, train_y)

Ridge(alpha=0.1, max_iter=100, tol=0.1)

In [25]:
pred_ridge_y = ridge_reg.predict(test_X)

**Accuracy score and mean squared error (MSE)**

In [26]:
ridge_reg.score(train_X, train_y)

0.8936293256602481

In [27]:
ridge_reg.score(test_X, test_y)

0.8651293361344408

In [28]:
print("MSE", mean_squared_error(test_y, pred_ridge_y))

MSE 12.191698490856195


**5-fold cross validation based on mean squared error (MSE)**

In [29]:
cv = RepeatedKFold(n_splits=5, n_repeats=1, random_state=1)
scores = cross_val_score(ridge_reg, X, y, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)

In [30]:
scores = np.absolute(scores)
print(scores)
print('Mean MSE: %.3f' % (np.mean(scores)))

[13.51010438  9.40918598 11.72885574  8.3552932   9.32791736]
Mean MSE: 10.466


***
### Elastic Net Regression
***

In [31]:
from sklearn.linear_model import ElasticNet

elastic_reg = ElasticNet(alpha=0.1, max_iter=100, tol=0.1)
elastic_reg.fit(train_X, train_y)

ElasticNet(alpha=0.1, max_iter=100, tol=0.1)

In [32]:
pred_elastic_y = elastic_reg.predict(test_X)

**Accuracy score and mean squared error (MSE)**

In [33]:
elastic_reg.score(train_X, train_y)

0.8170593174850932

In [34]:
elastic_reg.score(test_X, test_y)

0.7897475170056485

In [35]:
print("MSE", mean_squared_error(test_y, pred_elastic_y))

MSE 19.005874266150027


**5-fold cross validation based on mean squared error (MSE)**

In [36]:
cv = RepeatedKFold(n_splits=5, n_repeats=1, random_state=1)
scores = cross_val_score(elastic_reg, X, y, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)

In [37]:
scores = np.absolute(scores)
print(scores)
print('Mean MSE: %.3f' % (np.mean(scores)))

[20.36707312 15.40995067 20.44575913 14.71116777 13.54677376]
Mean MSE: 16.896
