In [180]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

#metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [None]:
#Attribute Information in text:

#Date, time year-month-day hour:minute:second

#Appliances, energy use in Wh

#lights, energy use of light fixtures in the house in Wh

#T1, Temperature in kitchen area, in Celsius

#RH_1, Humidity in kitchen area, in %

#T2, Temperature in living room area, in Celsius

#RH_2, Humidity in living room area, in %

#T3, Temperature in laundry room area

#RH_3, Humidity in laundry room area, in %

#T4, Temperature in office room, in Celsius

#RH_4, Humidity in office room, in %

#T5, Temperature in bathroom, in Celsius

#RH_5, Humidity in bathroom, in %

#T6, Temperature outside the building (north side), in Celsius

#RH_6, Humidity outside the building (north side), in %

#T7, Temperature in ironing room , in Celsius

#RH_7, Humidity in ironing room, in %

#T8, Temperature in teenager room 2, in Celsius

#RH_8, Humidity in teenager room 2, in %

#T9, Temperature in parents room, in Celsius

#RH_9, Humidity in parents room, in %

#To, Temperature outside (from Chievres weather station), in Celsius

#Pressure (from Chievres weather station), in mm Hg

#RH_out, Humidity outside (from Chievres weather station), in %

#Wind speed (from Chievres weather station), in m/s

#Visibility (from Chievres weather station), in km

#Tdewpoint (from Chievres weather station), Â °C

#rv1, Random variable 1, nondimensional

#rv2, Random variable 2, nondimensional

In [298]:
df = pd.read_csv("energydata_complete.csv")

In [299]:
df.shape

(19735, 29)

In [300]:
df.head(3)

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-11 17:00:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016-01-11 17:10:00,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,2016-01-11 17:20:00,50,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,...,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668


In [301]:
df = df.drop(columns = ['date','lights'])

In [302]:
scaler = MinMaxScaler()

In [303]:
normalized_df = pd.DataFrame(scaler.fit_transform(df), columns = df.columns)
features_df = normalized_df.drop(columns=['Appliances'])

In [304]:
app_target =  normalized_df['Appliances']

In [305]:
#Question 12

In [306]:
lcb = LinearRegression()

In [307]:
x1 = normalized_df['T2'].values.reshape(-1,1)
y1= normalized_df['T6'].values.reshape(-1,1)

In [308]:
x_train,x_test,y_train,y_test = train_test_split(x1,y1,test_size=0.3,random_state=42)

In [309]:
lcb.fit(x_train,y_train)

LinearRegression()

In [310]:
lcb_pred = lcb.predict(x_test)

In [311]:
r2 = r2_score(y_test,lcb_pred)
round(r2,2)

0.64

In [312]:
#Question 13

In [313]:
linear_model = LinearRegression()

In [314]:
x_train,x_test,y_train,y_test = train_test_split(features_df,app_target,test_size=0.3,random_state=42)

In [315]:
linear_model.fit(x_train,y_train)

LinearRegression()

In [316]:
linear_model_predictions = linear_model.predict(x_test)

In [317]:
mae = mean_absolute_error(y_test,linear_model_predictions)
round(mae,2)

0.05

In [318]:
#Question 14

In [319]:
#residual sum of sqaures
rss = np.sum(np.square(y_test - linear_model_predictions))
round(rss,2)

45.35

In [320]:
#Question 15

In [321]:
# root mean square error for linear model
linear_rmse = np.sqrt(mean_squared_error(y_test,linear_model_predictions))
round(rmse,3)

0.088

In [322]:
#Question 16

In [323]:
# coefficient of determination - r squared
r2 = r2_score(y_test,linear_model_predictions)
round(r2,2)

0.15

In [324]:
#Question 17

In [325]:
def get_weights_df(model, feat, col_name):
  #this function returns the weight of every feature
  weights = pd.Series(model.coef_, feat.columns).sort_values()
  weights_df = pd.DataFrame(weights).reset_index()
  weights_df.columns = ['Features', col_name]
  weights_df[col_name].round(3)
  return weights_df

In [326]:
linear_model_weights = get_weights_df(linear_model, x_train, 'Linear_Model_Weight')

In [327]:
linear_model_weights.sort_values("Linear_Model_Weight",ascending=True)

Unnamed: 0,Features,Linear_Model_Weight
0,RH_2,-0.456698
1,T_out,-0.32186
2,T2,-0.236178
3,T9,-0.189941
4,RH_8,-0.157595
5,RH_out,-0.077671
6,RH_7,-0.044614
7,RH_9,-0.0398
8,T5,-0.015657
9,T1,-0.003281


In [328]:
#Question 18

In [335]:
#train ridge regression with 0.4
ridge_reg = Ridge(0.4)
ridge_reg.fit(x_train,y_train)

Ridge(alpha=0.4)

In [336]:
ridge_reg_predict = ridge_reg.predict(x_test)

In [337]:
#ridge root mean square error
ridge_rmse = np.sqrt(mean_squared_error(y_test,ridge_reg_predict))
round(rmse,3)

0.088

In [338]:
ob_value = ridge_rmse == linear_rmse

print("It is absolutely", ob_value ,"that there is no change in value between Ridge model RMSE and Linear model RMSE")

It is absolutely False that there is no change in value between Ridge model RMSE and Linear model RMSE


In [339]:
#Question 19

In [340]:
#training a lasso with 0.001
lasso_reg = Lasso(0.001)
lasso_reg.fit(x_train,y_train)

Lasso(alpha=0.001)

In [341]:
lasso_reg_predict = lasso_reg.predict(x_test)

In [342]:
lasso_model_weights = get_weights_df(lasso_reg, x_train, 'Lasso_Model_Weight')

In [349]:
lasso_model_weights.sort_values("Lasso_Model_Weight")

Unnamed: 0,Features,Lasso_Model_Weight
0,RH_out,-0.049557
1,RH_8,-0.00011
23,rv2,-0.0
22,RH_6,-0.0
21,T2,0.0
20,RH_2,-0.0
19,T3,0.0
18,RH_3,0.0
17,T4,-0.0
16,RH_4,0.0


In [344]:
#Question 20

In [352]:
lasso_rmse = np.sqrt(mean_squared_error(y_test,lasso_reg_predict))
round(rmse,3)

0.088

In [353]:
#End of Stage-B Quiz notebook