In [13]:
import pandas as pd
import numpy as np 
import seaborn as sns
%matplotlib inline
from matplotlib import pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
import warnings
warnings.filterwarnings("ignore")

In [3]:
energy_plant_data_updated = pd.read_csv("D:\\Data Science Roadmap and Practice\\Machine Learning Projects\\Energy_Management_using_Machine_Learning\\notebook\Data\\energy_plant_data_updated.csv")
energy_plant_data_updated

Unnamed: 0,ambient_temp,exhaust_vacuum,ambient_pressure,relative_humidity,electrical_energy
0,29.07,72.51,1009.24,58.38,449.371855
1,32.38,67.17,1006.97,60.04,450.861043
2,19.29,67.71,1007.94,63.71,460.894029
3,16.45,41.48,1016.64,45.21,467.577314
4,21.43,46.97,1013.94,61.25,469.805723
...,...,...,...,...,...
9525,4.44,38.44,1016.14,75.35,499.615488
9526,18.18,67.71,1004.50,87.26,461.130122
9527,10.96,45.01,1017.97,95.82,481.245635
9528,8.70,36.24,1013.34,89.50,490.928341


In [4]:
energy_plant_data_updated.head()

Unnamed: 0,ambient_temp,exhaust_vacuum,ambient_pressure,relative_humidity,electrical_energy
0,29.07,72.51,1009.24,58.38,449.371855
1,32.38,67.17,1006.97,60.04,450.861043
2,19.29,67.71,1007.94,63.71,460.894029
3,16.45,41.48,1016.64,45.21,467.577314
4,21.43,46.97,1013.94,61.25,469.805723


In [6]:
X = energy_plant_data_updated.loc[:,["ambient_temp","exhaust_vacuum","ambient_pressure","relative_humidity"]]
y = energy_plant_data_updated.loc[:,["electrical_energy"]]

In [7]:
X

Unnamed: 0,ambient_temp,exhaust_vacuum,ambient_pressure,relative_humidity
0,29.07,72.51,1009.24,58.38
1,32.38,67.17,1006.97,60.04
2,19.29,67.71,1007.94,63.71
3,16.45,41.48,1016.64,45.21
4,21.43,46.97,1013.94,61.25
...,...,...,...,...
9525,4.44,38.44,1016.14,75.35
9526,18.18,67.71,1004.50,87.26
9527,10.96,45.01,1017.97,95.82
9528,8.70,36.24,1013.34,89.50


In [8]:
y

Unnamed: 0,electrical_energy
0,449.371855
1,450.861043
2,460.894029
3,467.577314
4,469.805723
...,...
9525,499.615488
9526,461.130122
9527,481.245635
9528,490.928341


In [9]:
numerical_feature = X.select_dtypes(exclude="object").columns
categorical_feature = X.select_dtypes(include="object").columns

numerical_transformer = StandardScaler()
oh_transformer = OneHotEncoder()

preprocessor = ColumnTransformer([
    ('OneHotEncoder',oh_transformer,categorical_feature),
    ('StandardScaler',numerical_transformer,numerical_feature)
])

In [10]:
X = preprocessor.fit_transform(X)

In [11]:
X

array([[ 1.2594508 ,  1.42033694, -0.69250215, -1.01112404],
       [ 1.70694972,  0.99905499, -1.08354631, -0.8976183 ],
       [-0.06276658,  1.04165654, -0.91644815, -0.64667487],
       ...,
       [-1.18894967, -0.74918619,  0.81138134,  1.54890915],
       [-1.49449275, -1.44106684,  0.01378906,  1.1167668 ],
       [ 1.11073514,  0.34188671, -1.89664254, -1.04531252]])

In [12]:
X.shape

(9530, 4)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [15]:
X_train

array([[ 0.94579596,  1.24756401, -0.97329598, -1.30446118],
       [ 0.56319114,  1.37852431, -1.21274592,  1.35335106],
       [-0.08169402, -0.80204367,  1.0818392 , -1.71472291],
       ...,
       [-0.9415409 , -0.77522047,  1.59346966,  1.80395519],
       [ 1.08910377,  0.94698644,  0.4909663 , -1.6880559 ],
       [ 0.72272248,  1.36195704, -1.53660629,  0.6586412 ]])

In [16]:
y_train

Unnamed: 0,electrical_energy
5697,446.379699
7303,446.486879
6750,480.421354
6427,498.967311
9438,460.463982
...,...
5734,444.062099
5191,470.132596
5390,477.150546
860,452.874680


In [18]:
def evaluate_model(true,predict):
    mae = mean_absolute_error(true,predict)
    mse = mean_squared_error(true,predict)
    rmse = np.sqrt(mean_squared_error(true,predict))
    r2_result = r2_score(true,predict)
    return mae, rmse, r2_result

In [20]:
models = {
    'LinearRegression' : LinearRegression(),
    'Ridge' : Ridge(),
    'Lasso' : Lasso(),
    'ElasticNet' : ElasticNet(),
    'SVR' : SVR(),
    'DecisionTreeRegressor' : DecisionTreeRegressor(),
    'RandomForestRegressor' : RandomForestRegressor(),
    'AdaBoostRegressor' : AdaBoostRegressor(),
    'GradientBoostingRegressor' : GradientBoostingRegressor()
}

model_list = []
r2_list = []

for i in range (len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,y_train)
    y_train_predict = model.predict(X_train)
    y_test_predict = model.predict(X_test)

    model_train_mae, model_train_rmse, model_train_r2_result = evaluate_model(y_train,y_train_predict)
    model_test_mae, model_test_rmse, model_test_r2_result = evaluate_model(y_test,y_test_predict)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model performance for test set')
    print('- Root Mean Squared error: {:.4f}'.format(model_test_rmse))
    print('- Mean Absolute error: {:.4f}'.format(model_test_mae))
    print('- R2 Score: {:.4f}'.format(model_test_r2_result))

    print('Model performance for train set')
    print('- Root Mean Squared error: {:.4f}'.format(model_train_rmse))
    print('- Mean Absolute error: {:.4f}'.format(model_train_mae))
    print('- R2 Score: {:.4f}'.format(model_train_r2_result))

    print('='*35)
    print('\n')

LinearRegression
Model performance for test set
- Root Mean Squared error: 4.6162
- Mean Absolute error: 3.6821
- R2 Score: 0.9260
Model performance for train set
- Root Mean Squared error: 4.6721
- Mean Absolute error: 3.7222
- R2 Score: 0.9248


Ridge
Model performance for test set
- Root Mean Squared error: 4.6163
- Mean Absolute error: 3.6823
- R2 Score: 0.9260
Model performance for train set
- Root Mean Squared error: 4.6721
- Mean Absolute error: 3.7223
- R2 Score: 0.9248


Lasso
Model performance for test set
- Root Mean Squared error: 5.0742
- Mean Absolute error: 4.0505
- R2 Score: 0.9106
Model performance for train set
- Root Mean Squared error: 5.1335
- Mean Absolute error: 4.0992
- R2 Score: 0.9092


ElasticNet
Model performance for test set
- Root Mean Squared error: 6.4640
- Mean Absolute error: 5.1612
- R2 Score: 0.8549
Model performance for train set
- Root Mean Squared error: 6.4618
- Mean Absolute error: 5.1933
- R2 Score: 0.8561


SVR
Model performance for test set
-