In [29]:
import pandas as pd 
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso


df = pd.read_excel('energydata_complete.xlsx')
x = df['T2'].values.reshape(-1,1)
y = df['T6']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(x_train,y_train)
y_pred = model.predict(x_test)

rmse = np.sqrt(mean_squared_error(y_pred,y_test))
mae = mean_absolute_error(y_pred,y_test)

r2 = r2_score(y_pred,y_test)

print(f'RMSE: {round(rmse,3)}')
print('MAE: ', mae)


RMSE: 3.633
MAE:  2.826851760546555


In [30]:
#Question 18
df.drop("date", axis=1, inplace=True)
df.drop("lights", axis=1, inplace=True)

scaler = MinMaxScaler()

features_df = df.drop("Appliances", axis=1)
target = df["Appliances"]

x_train, x_test, y_train, y_test = train_test_split(features_df, target, test_size=0.3, random_state=42)

x_train_normalized = scaler.fit_transform(x_train)
x_test_normalized = scaler.transform(x_test)

model_2 = LinearRegression()
model_2.fit(x_train_normalized,y_train)

In [31]:

y_pred = model_2.predict(x_train_normalized)
mae= mean_absolute_error(y_train, y_pred)
print(f"The mean absolute error for the training set is {round(mae,3)}")

The mean absolute error for the training set is 53.742


In [32]:
# QUESTION 19
rmse = np.sqrt(mean_squared_error(y_train, y_pred))
print(f"The mean absolute error for the training set is {round(rmse,3)}")


The mean absolute error for the training set is 95.216


In [33]:
#Question 20
y_pred = model_2.predict(x_test_normalized)
mae= mean_absolute_error(y_test, y_pred)
print(f"The mean absolute error for the test set is {round(mae,3)}")

The mean absolute error for the test set is 53.643


In [34]:
#Question 21
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"The root mean squared error for the test set is {round(rmse,3)}")

The root mean squared error for the test set is 93.64


In [35]:
#Question 23
ridge_model = Ridge()
ridge_model.fit(x_train_normalized, y_train)
y_pred = ridge_model.predict(x_test_normalized)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"The root mean squared error for the test set using Ridge regression is {round(rmse,3)}")

The root mean squared error for the test set using Ridge regression is 93.709


In [36]:
#Question 24
lasso_model = Lasso()
lasso_model.fit(x_train, y_train)

def get_weights_df(model, feat, col_name):  
    weights = pd.Series(model.coef_,index=feat.columns).sort_values()
    weights_df = pd.DataFrame({'Features': weights.index, col_name: weights.values})
    weights_df[col_name]= weights_df[col_name].round(3)
    return weights_df
lasso_weights_df = get_weights_df(lasso_model, x_train, 'Lasso_weight')
print(lasso_weights_df)

       Features  Lasso_weight
0            T9       -16.392
1            T2       -10.757
2          RH_2       -10.147
3          RH_8        -5.458
4         T_out        -3.696
5          RH_7        -1.724
6            T5        -1.589
7          RH_9        -1.550
8        RH_out        -0.018
9            T1        -0.000
10    Tdewpoint         0.000
11           T7        -0.000
12         RH_4         0.000
13           T4        -0.000
14          rv2         0.000
15          rv1         0.024
16  Press_mm_hg         0.087
17   Visibility         0.177
18         RH_5         0.251
19         RH_6         0.304
20    Windspeed         2.192
21         RH_3         3.658
22           T6         4.817
23           T8         6.598
24         RH_1        14.840
25           T3        21.215


In [37]:
#Question 25
y_pred = lasso_model.predict(x_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"The root mean squared error for the test set using lasso regression is {round(rmse,3)}")

The root mean squared error for the test set using lasso regression is 93.892
