In [5]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [33]:
df = pd.read_excel('dataset/energy_efficiency.xlsx')
column_names = {'X1':'Relative_Compactness', 'X2': 'Surface_Area',
           'X3': 'Wall_Area', 'X4': 'Roof_Area',
           'X5': 'Overall_Height', 'X6': 'Orientation',
           'X7': 'Glazing_Area', 'X8': 'Glazing_Area_Distribution',
           'Y1': 'Heating_Load', 'Y2': 'Cooling_Load'}
df.rename(columns=column_names)

Unnamed: 0,Relative_Compactness,Surface_Area,Wall_Area,Roof_Area,Overall_Height,Orientation,Glazing_Area,Glazing_Area_Distribution,Heating_Load,Cooling_Load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.90,563.5,318.5,122.50,7.0,2,0.0,0,20.84,28.28
...,...,...,...,...,...,...,...,...,...,...
763,0.64,784.0,343.0,220.50,3.5,5,0.4,5,17.88,21.40
764,0.62,808.5,367.5,220.50,3.5,2,0.4,5,16.54,16.88
765,0.62,808.5,367.5,220.50,3.5,3,0.4,5,16.44,17.11
766,0.62,808.5,367.5,220.50,3.5,4,0.4,5,16.48,16.61


In [44]:
# normalising our dataset to a common scale using the min max scaler
scaler = MinMaxScaler()
normalised_df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
#normalised_df
features_df = normalised_df.drop(columns= ['Y1','Y2'])
#features_df
heating_target = normalised_df['Y1']
#heating_target

0      0.257212
1      0.257212
2      0.257212
3      0.257212
4      0.399838
         ...   
763    0.320032
764    0.283904
765    0.281208
766    0.282286
767    0.286600
Name: Y1, Length: 768, dtype: float64

In [47]:
# splitting our dataset into training and testing dataset
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features_df, heating_target, test_size=0.3, random_state=1)

In [52]:
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
# fitting the model into the training dataset
linear_model.fit(x_train, y_train)
# obtaining predictions
predicted_values = linear_model.predict(x_test)
predicted_values

array([0.19213867, 0.58276367, 0.7355957 , 0.7109375 , 0.76196289,
       0.24194336, 0.16894531, 0.60986328, 0.23486328, 0.55883789,
       0.30932617, 0.65917969, 0.6965332 , 0.29321289, 0.19189453,
       0.72119141, 0.57861328, 0.31201172, 0.25683594, 0.74487305,
       0.70410156, 0.07861328, 0.61938477, 0.32104492, 0.20507812,
       0.73999023, 0.31860352, 0.18798828, 0.79516602, 0.2722168 ,
       0.6784668 , 0.72802734, 0.14038086, 0.28442383, 0.57836914,
       0.80615234, 0.32592773, 0.12939453, 0.72900391, 0.24169922,
       0.82055664, 0.15991211, 0.10986328, 0.16821289, 0.12060547,
       0.14941406, 0.48999023, 0.69580078, 0.7578125 , 0.71875   ,
       0.10107422, 0.66699219, 0.10766602, 0.24145508, 0.76098633,
       0.76538086, 0.73486328, 0.63525391, 0.15844727, 0.2890625 ,
       0.19555664, 0.59375   , 0.61157227, 0.7644043 , 0.56835938,
       0.92407227, 0.68115234, 0.31274414, 0.67114258, 0.23291016,
       0.54858398, 0.24487305, 0.23486328, 0.72436523, 0.28784

In [56]:
# MAE
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, predicted_values)
round(mae,3)

0.063

In [60]:
# RSS
import numpy as np
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, predicted_values))
round(rmse, 3)

0.008

In [62]:
from sklearn.metrics import r2_score
r2_score = r2_score(y_test, predicted_values)
round(r2_score, 3)

0.892