In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("ENB2012_data.csv")
df.head(100)

Unnamed: 0,relative_compactness,surface_area,wall_area,roof_area,overall_height,orientation,glazing_area,glazing_area_distribution,cooling_load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,21.33
4,0.90,563.5,318.5,122.50,7.0,2,0.0,0,28.28
...,...,...,...,...,...,...,...,...,...
95,0.62,808.5,367.5,220.50,3.5,5,0.1,1,14.27
96,0.98,514.5,294.0,110.25,7.0,2,0.1,2,25.95
97,0.98,514.5,294.0,110.25,7.0,3,0.1,2,25.63
98,0.98,514.5,294.0,110.25,7.0,4,0.1,2,26.13


In [4]:
df.columns

Index(['relative_compactness', 'surface_area', 'wall_area', 'roof_area',
       'overall_height', 'orientation', 'glazing_area',
       'glazing_area_distribution', 'cooling_load'],
      dtype='object')

In [5]:
cols_to_use = ['relative_compactness', 'surface_area', 'wall_area', 'roof_area',
       'overall_height', 'orientation', 'glazing_area',
       'glazing_area_distribution', 'cooling_load']
df = df[cols_to_use]
df.head()

Unnamed: 0,relative_compactness,surface_area,wall_area,roof_area,overall_height,orientation,glazing_area,glazing_area_distribution,cooling_load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,28.28


In [6]:
df.nunique()

relative_compactness          12
surface_area                  12
wall_area                      7
roof_area                      4
overall_height                 2
orientation                    4
glazing_area                   4
glazing_area_distribution      6
cooling_load                 636
dtype: int64

In [7]:
df.shape

(768, 9)

In [8]:
df.isna().sum()

relative_compactness         0
surface_area                 0
wall_area                    0
roof_area                    0
overall_height               0
orientation                  0
glazing_area                 0
glazing_area_distribution    0
cooling_load                 0
dtype: int64

In [10]:
cols_to_fill_zero = ['relative_compactness', 'surface_area', 'wall_area', 'roof_area',
       'overall_height', 'orientation', 'glazing_area',
       'glazing_area_distribution', 'cooling_load']
df[cols_to_fill_zero] = df[cols_to_fill_zero].fillna(0)
df.isna().sum()

relative_compactness         0
surface_area                 0
wall_area                    0
roof_area                    0
overall_height               0
orientation                  0
glazing_area                 0
glazing_area_distribution    0
cooling_load                 0
dtype: int64

In [11]:
df.head()

Unnamed: 0,relative_compactness,surface_area,wall_area,roof_area,overall_height,orientation,glazing_area,glazing_area_distribution,cooling_load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,28.28


In [13]:

x=df.drop('cooling_load',axis=1)
x.shape

(768, 8)

In [14]:
# Giving Target Values
y=df['cooling_load']
y.shape

(768,)

In [15]:
#Training Data set

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(x, y, test_size=0.30, random_state=0)

In [16]:
#KFold
from sklearn.model_selection import KFold
kfold_validation = KFold(n_splits=5)

#cross_val_score
from sklearn.model_selection import cross_val_score

#mean_squared_error
from sklearn.metrics import mean_squared_error
mse = mean_squared_error

In [17]:
#Lasso Regression model

from sklearn.linear_model import Lasso
lasso_reg = Lasso().fit(X_train, y_train)
results_lasso = cross_val_score(lasso_reg,x,y,cv=kfold_validation)


In [18]:
results_lasso

array([0.55315125, 0.73727379, 0.83529996, 0.78290388, 0.78142277])

In [19]:
y_lasso_pred = lasso_reg.predict(X_test)

In [20]:
#Mean Square Error

print('mse',mean_squared_error(y_test, y_lasso_pred))

mse 22.212948651248162


In [21]:
#Ridge Regression model

from sklearn.linear_model import Ridge
ridge_reg = Ridge().fit(X_train, y_train)
results_ridge = cross_val_score(ridge_reg,x,y,cv=kfold_validation)

In [22]:
results_ridge

array([0.80897807, 0.85986635, 0.88693863, 0.88760844, 0.8908711 ])

In [23]:
y_ridge_pred = ridge_reg.predict(X_test)

In [24]:
# Mean Square Error

print('mse',mean_squared_error(y_test, y_ridge_pred))

mse 10.459172641361397


In [25]:
#Elastic Net Regression model

from sklearn.linear_model import ElasticNet
eNet_reg = ElasticNet().fit(X_train, y_train)
results_eNet =cross_val_score(eNet_reg,x,y,cv=kfold_validation)


In [26]:
results_eNet

array([0.54653325, 0.74219748, 0.84654131, 0.7834462 , 0.7907762 ])

In [27]:
y_eNet_pred=eNet_reg.predict(X_test)

In [28]:
#Mean Square Error

print('mse',mean_squared_error(y_test, y_eNet_pred))

mse 20.796976182485174
