In [145]:
import numpy as np
import pandas as pd

from index import get_weights_df

from sklearn.linear_model import Lasso, LinearRegression, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler


In [146]:
energy_data_csv = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv'
energy_data= pd.read_csv(energy_data_csv, parse_dates=['date'])
print(energy_data.head())

                 date  Appliances  lights     T1       RH_1    T2       RH_2  \
0 2016-01-11 17:00:00          60      30  19.89  47.596667  19.2  44.790000   
1 2016-01-11 17:10:00          60      30  19.89  46.693333  19.2  44.722500   
2 2016-01-11 17:20:00          50      30  19.89  46.300000  19.2  44.626667   
3 2016-01-11 17:30:00          50      40  19.89  46.066667  19.2  44.590000   
4 2016-01-11 17:40:00          60      40  19.89  46.333333  19.2  44.530000   

      T3       RH_3         T4  ...         T9   RH_9     T_out  Press_mm_hg  \
0  19.79  44.730000  19.000000  ...  17.033333  45.53  6.600000        733.5   
1  19.79  44.790000  19.000000  ...  17.066667  45.56  6.483333        733.6   
2  19.79  44.933333  18.926667  ...  17.000000  45.50  6.366667        733.7   
3  19.79  45.000000  18.890000  ...  17.000000  45.40  6.250000        733.8   
4  19.79  45.000000  18.890000  ...  17.000000  45.40  6.133333        733.9   

   RH_out  Windspeed  Visibility  Tdew

In [147]:

X = energy_data.T2.values.reshape(-1, 1)
y= energy_data.T6.values.reshape(-1, 1)

In [148]:
"Question 12"
linreg = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
linreg.fit(X_train, y_train)
y_pred = linreg.predict(X_test)
R_squared = r2_score(y_test, y_pred)
print("{:.2f}".format(R_squared))

0.64


In [149]:
energy_data1 = energy_data.drop(['date', 'lights'], axis=1)
scaler = MinMaxScaler()
normalised_df = pd.DataFrame(scaler.fit_transform(energy_data1), columns=energy_data1.columns)
X = normalised_df.drop(['Appliances'], axis=1)
y= normalised_df['Appliances']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
app_linreg = LinearRegression()
app_linreg.fit(X_train, y_train)
y_pred = app_linreg.predict(X_test)

In [150]:
"Question 13"
app_MAE = MAE(y_test, y_pred)
print("The mean absolute error is {:.2f}.".format(app_MAE))

The mean absolute error is 0.05.


In [151]:
"Question 14"
app_RSS = np.sum(np.square(y_test - y_pred))
print("The sum of squared error is {:.2f}.".format(app_RSS))

The sum of squared error is 45.35.


In [152]:
"Question 15"
app_MSE = MSE(y_test, y_pred)
app_RMSE = np.sqrt(app_MSE)
print("The root mean squared error is {:.3f}.".format(app_RMSE))

The root mean squared error is 0.088.


In [153]:
"Question 16"
app_R_Squared = r2_score(y_test, y_pred)
print("R-squared is {:.2f}.".format(app_R_Squared))

R-squared is 0.15.


In [154]:
"Question 17"
linear_model_weights = get_weights_df(app_linreg, X_train, 'Linear_Model_Weight')
row_length = linear_model_weights.shape[0] - 1
print(linear_model_weights.loc[[0, row_length], :])


   Features  Linear_Model_Weight
0      RH_2            -0.456698
25     RH_1             0.553547


In [155]:
"Question 18"
ridge_reg = Ridge(alpha=0.4)
ridge_reg.fit(X_train, y_train)
y_pred = ridge_reg.predict(X_test)
app_MSE = MSE(y_test, y_pred)
app_RMSE = np.sqrt(app_MSE)
print("The root mean squared error is {:.3f}.".format(app_RMSE))


The root mean squared error is 0.088.


In [156]:
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(X_train, y_train)
y_pred = lasso_reg.predict(X_test)

In [157]:
"Question 19"
lasso_model_weights = get_weights_df(lasso_reg, X_train, 'Lasso_Weight')
non_zero_weight = lasso_model_weights[lasso_model_weights.Lasso_Weight != 0]
print(non_zero_weight)

     Features  Lasso_Weight
0      RH_out     -0.049557
1        RH_8     -0.000110
24  Windspeed      0.002912
25       RH_1      0.017880


In [158]:
"Question 20"
app_MSE = MSE(y_test, y_pred)
app_RMSE = np.sqrt(app_MSE)
print("The root mean squared error is {:.3f}.".format(app_RMSE))

The root mean squared error is 0.094.
