In [44]:
import pandas as pd
import numpy as np
energy_dataset = pd.read_csv('energydata_complete.csv')
energy_dataset_df = energy_dataset.rename(columns = {'T2':'Temp_LR',
                                                     'T6':'Temp_Out'})
linear_energy_dataset_df = energy_dataset_df[['Temp_LR','Temp_Out']]

x = energy_dataset_df[['Temp_LR']]
y = energy_dataset_df[['Temp_Out']]

#spliting the data
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

#training and fitting the data
from sklearn.linear_model import LinearRegression
regressor_model = LinearRegression()
regressor_model.fit(x_train, y_train)

#prediction
y_pred = regressor_model.predict(x_test)

#Rsquare
from sklearn.metrics import r2_score 
R_square = r2_score(y_test, y_pred) 
print('R^2:',round(R_square,2))

R^2: 0.65


In [103]:
df_drop = energy_dataset.drop(columns = ['date', 'lights'])

#normalising the dataset using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

normalised_energy_df = pd.DataFrame(scaler.fit_transform(df_drop),columns = df_drop.columns)
features_energy_df = normalised_energy_df.drop(columns = 'Appliances') #removing/dropping the dependent variables

y2 = normalised_energy_df[['Appliances']]
x2 = features_energy_df

#spliting the data
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x2, y2, test_size=0.30, random_state=42)

#training and fitting the data
from sklearn.linear_model import LinearRegression
regressor2 = LinearRegression() #initialize the model
regressor2.fit(x_train, y_train)

#obtaining predictions using test data
y2_predicted_values = regressor2.predict(x_test)


#MAE
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, y2_predicted_values)
print('MAE:',round(mae, 2))

MAE: 0.05


In [58]:
#RSS
rss = ((y_test - y2_predicted_values)** 2).sum()
print('RSS:',round(rss, 2))

RSS: Appliances    45.35
dtype: float64


In [56]:
#RMSE
from sklearn.metrics import mean_squared_error
RMSE = np.sqrt(mean_squared_error(y_test, y2_predicted_values ))
print('RMSE:', round(RMSE, 3))

RMSE: 0.088


In [59]:
#co-efficient of determination
from sklearn.metrics import r2_score
r2_score = r2_score(y_test, y2_predicted_values)
print('R_Squared:', round(r2_score, 2))

R_Squared: 0.15


In [136]:
tran_reg = regressor2.coef_.transpose()

coeff_df = pd.DataFrame(tran_reg, x2.columns, columns=['Coefficient'])
print('Lowest Weight:', coeff_df.min(), 'Highest Weight:', coeff_df.max())
coeff_df

Lowest Weight: Coefficient   -0.456698
dtype: float64 Highest Weight: Coefficient    0.553547
dtype: float64


Unnamed: 0,Coefficient
T1,-0.003281
RH_1,0.553547
T2,-0.236178
RH_2,-0.456698
T3,0.290627
RH_3,0.096048
T4,0.028981
RH_4,0.026386
T5,-0.015657
RH_5,0.016006


In [94]:
#Ridge
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha = 0.4)
ridge_reg.fit(x_train, y_train)

#obtaining predictions using test data
y2_predicted_values_ridge = ridge_reg.predict(x_test)

#RMSE ridge
from sklearn.metrics import mean_squared_error
RMSE_Ridge = np.sqrt(mean_squared_error(y_test, y2_predicted_values_ridge ))
print('RMSE_Ridge:', round(RMSE_Ridge, 3))




RMSE_Ridge: 0.088


In [99]:
#lasso
from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha = 0.001)
lasso_reg.fit(x_train, y_train)

#obtaining predictions using test data
y2_predicted_values_lasso = lasso_reg.predict(x_test)

#RMSE for lasso
from sklearn.metrics import mean_squared_error
RMSE_Lasso = np.sqrt(mean_squared_error(y_test, y2_predicted_values_lasso ))
print('RMSE_Lasso:', round(RMSE_Lasso, 3))


RMSE_Lasso: 0.094


In [144]:
coeff_df_lasso = pd.DataFrame(lasso_reg.coef_, features_energy_df.columns, columns=['Coefficient'])
count = (coeff_df_lasso['Coefficient'] != 0).sum()
print('Number of non-zero feature weights:',count)
coeff_df_lasso

Number of non-zero feature weights: 4


Unnamed: 0,Coefficient
T1,0.0
RH_1,0.01788
T2,0.0
RH_2,-0.0
T3,0.0
RH_3,0.0
T4,-0.0
RH_4,0.0
T5,-0.0
RH_5,0.0


In [92]:
features_energy_df

Unnamed: 0,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,RH_5,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,0.327350,0.566187,0.225345,0.684038,0.215188,0.746066,0.351351,0.764262,0.175506,0.381691,...,0.223032,0.677290,0.372990,0.097674,0.894737,0.500000,0.953846,0.538462,0.265449,0.265449
1,0.327350,0.541326,0.225345,0.682140,0.215188,0.748871,0.351351,0.782437,0.175506,0.381691,...,0.226500,0.678532,0.369239,0.100000,0.894737,0.476190,0.894872,0.533937,0.372083,0.372083
2,0.327350,0.530502,0.225345,0.679445,0.215188,0.755569,0.344745,0.778062,0.175506,0.380037,...,0.219563,0.676049,0.365488,0.102326,0.894737,0.452381,0.835897,0.529412,0.572848,0.572848
3,0.327350,0.524080,0.225345,0.678414,0.215188,0.758685,0.341441,0.770949,0.175506,0.380037,...,0.219563,0.671909,0.361736,0.104651,0.894737,0.428571,0.776923,0.524887,0.908261,0.908261
4,0.327350,0.531419,0.225345,0.676727,0.215188,0.758685,0.341441,0.762697,0.178691,0.380037,...,0.219563,0.671909,0.357985,0.106977,0.894737,0.404762,0.717949,0.520362,0.201611,0.201611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19730,0.926786,0.537657,0.711655,0.606309,0.830841,0.579374,0.864865,0.765258,0.752031,0.339590,...,0.864724,0.729443,0.891747,0.602326,0.416667,0.238095,0.348718,0.901961,0.861981,0.861981
19731,0.919747,0.536006,0.701769,0.607836,0.825302,0.582178,0.864865,0.765258,0.754897,0.338487,...,0.864724,0.729443,0.887460,0.602326,0.421053,0.250000,0.361538,0.900452,0.985726,0.985726
19732,0.919747,0.538666,0.692651,0.627198,0.818378,0.603988,0.864865,0.771233,0.754897,0.337585,...,0.864724,0.729443,0.883173,0.602326,0.425439,0.261905,0.374359,0.898944,0.583979,0.583979
19733,0.919747,0.549491,0.677054,0.634717,0.805085,0.585294,0.864865,0.773794,0.752031,0.336583,...,0.864724,0.730581,0.878885,0.602326,0.429825,0.273810,0.387179,0.897436,0.126371,0.126371
