In [40]:
#importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as plt
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet

In [41]:
#Converting csv file into DataFrame
energydata = pd.read_csv('energydata_complete.csv')
energydata.sample(2)

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
9897,2016-03-20 10:30:00,150,0,20.89,38.95,18.29,38.86,21.6,35.06,19.79,...,19.39,37.933333,5.7,761.95,79.0,2.5,28.0,2.35,11.309992,11.309992
5872,2016-02-21 11:40:00,80,20,20.7,45.0,19.79,45.626667,21.426667,42.126667,20.675,...,18.29,47.4625,10.966667,756.566667,90.666667,8.333333,56.0,9.466667,33.831754,33.831754


In [42]:
#Droping Columns 'date' and 'lights'
energydata = energydata.drop(columns= ['date','lights'])
energydata.sample(2)

Unnamed: 0,Appliances,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
8428,50,19.7,36.79,16.7,40.7,20.29,37.5,19.945,35.5,17.7,...,17.39,39.7,-0.066667,754.6,89.0,1.0,64.333333,-1.733333,36.865272,36.865272
14487,60,21.5,36.9,18.963333,39.963333,22.6,35.79,21.956,33.92,20.0,...,20.1,38.2,5.15,763.05,80.0,2.5,26.0,1.95,9.310445,9.310445


In [43]:
#Checking for null values
energydata.isnull().sum().sum()

0

In [73]:
energydata.columns

Index(['Appliances', 'T1', 'RH_1', 'T2', 'RH_2', 'T3', 'RH_3', 'T4', 'RH_4',
       'T5', 'RH_5', 'T6', 'RH_6', 'T7', 'RH_7', 'T8', 'RH_8', 'T9', 'RH_9',
       'T_out', 'Press_mm_hg', 'RH_out', 'Windspeed', 'Visibility',
       'Tdewpoint', 'rv1', 'rv2'],
      dtype='object')

In [44]:
#Normalizing the data set
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normalized_df = pd.DataFrame(scaler.fit_transform(energydata), columns=energydata.columns)
X = normalized_df.drop(columns=['Appliances'])
y = normalized_df['Appliances']

#Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [45]:
#Evaluating performance

def performance(model, predicted):
    mae = mean_absolute_error(y_test, predicted).round(3)
    rmse = np.sqrt(mean_squared_error(y_test, predicted)).round(3)
    rsquared = r2_score(y_test, predicted).round(3)
    rss = np.sum(np.square(y_test - predicted)).round(3)
    
    print ('Model Evaluation for', model)
    print ('Mean Absolute Error:', mae)
    print ('Root Mean Squared Error:', rmse)
    print ('R_Squared:', rsquared)
    print ('Residual Sum of Squares:', rss)
    


# Linear Regression

In [46]:
#creating the model and fitting to the training sets
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

#obtaining predictions
predictedLr = linear_model.predict(X_test)

#Evaluating Performance
performance(linear_model, predictedLr)

Model Evaluation for LinearRegression()
Mean Absolute Error: 0.05
Root Mean Squared Error: 0.088
R_Squared: 0.149
Residual Sum of Squares: 45.348


# Ridge Regression

In [47]:
#Model fitting
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha = 0.7)
ridge_reg.fit(X_train, y_train)

#Predictions
predictedRidge = ridge_reg.predict(X_test)

#Evaluating opreformancce
performance(ridge_reg, predictedRidge)

Model Evaluation for Ridge(alpha=0.7)
Mean Absolute Error: 0.05
Root Mean Squared Error: 0.088
R_Squared: 0.148
Residual Sum of Squares: 45.392


# Lasso Regression

In [48]:
#Model Fitting
from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha = 0.003)
lasso_reg.fit(X_train, y_train)

#predictions
predictedLasso = lasso_reg.predict(X_test)

#performance
performance(lasso_reg, predictedLasso)


Model Evaluation for Lasso(alpha=0.003)
Mean Absolute Error: 0.057
Root Mean Squared Error: 0.095
R_Squared: -0.0
Residual Sum of Squares: 53.281


# Elastic Net Regression

In [49]:
#model fitting
from sklearn.linear_model import ElasticNet
ENR = ElasticNet(alpha = 0.5)
ENR.fit(X_train, y_train)

#predictions
predictedENR = ENR.predict(X_test)

#performance
performance(ENR, predictedENR)

Model Evaluation for ElasticNet(alpha=0.5)
Mean Absolute Error: 0.057
Root Mean Squared Error: 0.095
R_Squared: -0.0
Residual Sum of Squares: 53.281


# Polynomial Regression

In [50]:
#model fitting
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2)
Xtrain_pr = poly.fit_transform(X_train)
Xtest_pr = poly.fit_transform(X_test)
PR = LinearRegression()
PR.fit(Xtrain_pr, y_train)

#predictions
predictedPR = PR.predict(Xtest_pr)

#performance
performance(poly, predictedPR)


Model Evaluation for PolynomialFeatures()
Mean Absolute Error: 0.048
Root Mean Squared Error: 0.082
R_Squared: 0.253
Residual Sum of Squares: 39.812


In [51]:
#Comparing the effects of regularization
def get_weights(model, feat, col_name):
    weights = pd.Series(model.coef_, feat.columns).sort_values()
    weights_df = pd.DataFrame(weights).reset_index()
    weights_df.columns = ['Features', col_name]
    weights_df[col_name].round(3)
    return weights_df

linear_model_weights = get_weights(linear_model, X_train, 'Linear_Model_Weight')
ridge_weights = get_weights(ridge_reg, X_train, 'Ridge_Weight')
lasso_weights = get_weights(lasso_reg, X_train, 'Lasso_Weight')
ENR_weights = get_weights(ENR, X_train, 'Elastic_Net_Weight')


final_weights = pd.merge(linear_model_weights, ridge_weights, on='Features')
final_weights = pd.merge(final_weights, lasso_weights, on='Features')
final_weights = pd.merge(final_weights, ENR_weights, on='Features')

final_weights.head(2)

Unnamed: 0,Features,Linear_Model_Weight,Ridge_Weight,Lasso_Weight,Elastic_Net_Weight
0,RH_2,-0.456698,-0.382694,-0.0,-0.0
1,T_out,-0.32186,-0.230892,0.0,0.0


# question 12

In [65]:
X12 = normalized_df['T2']
X12 = pd.DataFrame(normalized_df['T2'])
y12 = normalized_df['T6']
#Split data into training and testing sets
from sklearn.model_selection import train_test_split
X12train, X12test, y12train, y12test = train_test_split(X12, y12, test_size=0.3, random_state=42)
#creating the model and fitting to the training sets
from sklearn.linear_model import LinearRegression
linear_model12 = LinearRegression()
linear_model12.fit(X12train, y12train)

#obtaining predictions
predictedLr12 = linear_model12.predict(X12test)

#Evaluating Performance
performance(linear_model12, predictedLr12)

Model Evaluation for LinearRegression()
Mean Absolute Error: 0.332
Root Mean Squared Error: 0.362
R_Squared: -13.578
Residual Sum of Squares: 776.734


In [63]:
X12.head()

Unnamed: 0,T2
0,0.225345
1,0.225345
2,0.225345
3,0.225345
4,0.225345


In [66]:
linear_model12.coef_

array([0.8910771])

In [72]:
linear_model12.coef_.min()

0.891077096116988

In [67]:
X2 = normalized_df.drop(columns=['T6'])
y2 = normalized_df['T6']
#Split data into training and testing sets
from sklearn.model_selection import train_test_split
X2train, X2test, y2train, y2test = train_test_split(X2, y2, test_size=0.3, random_state=42)
#creating the model and fitting to the training sets
from sklearn.linear_model import LinearRegression
linear_model2 = LinearRegression()
linear_model2.fit(X2train, y2train)

#obtaining predictions
predictedLr2 = linear_model2.predict(X2test)

#Evaluating Performance
performance(linear_model2, predictedLr2)

Model Evaluation for LinearRegression()
Mean Absolute Error: 0.332
Root Mean Squared Error: 0.375
R_Squared: -14.647
Residual Sum of Squares: 833.711


In [80]:
energydata.columns

Index(['Appliances', 'T1', 'RH_1', 'T2', 'RH_2', 'T3', 'RH_3', 'T4', 'RH_4',
       'T5', 'RH_5', 'T6', 'RH_6', 'T7', 'RH_7', 'T8', 'RH_8', 'T9', 'RH_9',
       'T_out', 'Press_mm_hg', 'RH_out', 'Windspeed', 'Visibility',
       'Tdewpoint', 'rv1', 'rv2'],
      dtype='object')

In [68]:
linear_model2.coef_

array([ 2.75938774e-02, -2.00258757e-01, -3.29034212e-02,  3.22436623e-01,
        1.02678285e-01,  1.94422236e-02, -7.69313733e-02, -2.38960660e-02,
        3.51360241e-02, -6.86877291e-02, -3.75064849e-03, -9.57522759e-02,
       -4.29577818e-02,  9.86453279e-03, -3.89775506e-02, -4.42721335e-02,
        8.08410082e-02,  1.50027164e-02,  1.00318959e+00, -1.96059272e-02,
        1.43845987e-01, -1.03392858e-03, -8.91659561e-04, -9.27707695e-02,
       -2.23706546e-04, -2.23706546e-04])

AttributeError: 'LinearRegression' object has no attribute 'columns'

In [69]:
linear_model2.coef_.min()

-0.20025875727812908

In [74]:
linear_model2.coef_.max()

1.0031895936721984

In [77]:
ridge_reg2 = Ridge(alpha=0.4)
ridge_reg2.fit(X2train, y2train)

#obtaining predictions
predictedRR2 = ridge_reg2.predict(X2test)

#Evaluating Performance
performance(ridge_reg2, predictedRR2)

Model Evaluation for Ridge(alpha=0.4)
Mean Absolute Error: 0.332
Root Mean Squared Error: 0.375
R_Squared: -14.641
Residual Sum of Squares: 833.371


In [78]:
lass_reg2 = Lasso(alpha=0.001)
lass_reg2.fit(X2train, y2train)

#obtaining predictions
predictedLasR2 = lass_reg2.predict(X2test)

#Evaluating Performance
performance(lass_reg2, predictedLasR2)

Model Evaluation for Lasso(alpha=0.001)
Mean Absolute Error: 0.333
Root Mean Squared Error: 0.373
R_Squared: -14.463
Residual Sum of Squares: 823.91


In [79]:
lass_reg2.coef_

array([ 0.        , -0.        ,  0.        ,  0.04472716, -0.        ,
       -0.        ,  0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.0562673 , -0.        ,  0.        , -0.00099367,
        0.        , -0.        ,  0.        ,  0.84283618, -0.        ,
       -0.        ,  0.        , -0.        ,  0.03948853, -0.        ,
       -0.        ])