# 3. Deep Neural Network - KFold Cross Validated
+ (a) City Mileage CO2 Prediction
+ (b) Highway Mileage CO2 Prediction
+ (c) Subtract Highway from City Predictions to get CO2 Savings

In [102]:
# Load Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import seaborn as sns
import pandas_profiling as pp
pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip
import mpl_toolkits
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV

from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

import tensorflow as tf
from keras import models, layers
from tensorflow import keras
from tensorflow.keras.layers import Dense
from keras.models import Sequential
from pylab import rcParams

pip install plotly
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import math

from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

# Modeling Libraries
from sklearn import linear_model
from sklearn.metrics import make_scorer
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import svm
from sklearn.metrics import r2_score
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
from subprocess import check_output

%matplotlib inline

%config InlineBackend.figure_format='retina'
sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 16, 10
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

import os

# Hide Warnings
import warnings
warnings.filterwarnings('ignore')

# (a) City Mileage CO2 Prediction

In [103]:
# Loading the cleaned data
df = pd.read_csv('CO2_Cleaned.csv')

In [104]:
df = df.drop(['Hway_Mileage', 'Min_Wght'], axis = 1)

target_col = "CO2"

x = df.loc[:, df.columns != target_col].values
y = df.loc[:, target_col].values

In [105]:
from sklearn.model_selection import train_test_split
import pandas as pd
import os
import numpy as np
from sklearn import metrics
from scipy.stats import zscore
from sklearn.model_selection import KFold

# Keep a 10% holdout
x_main, x_holdout, y_main, y_holdout = train_test_split(    
    x, y, test_size=0.10) 

##########################################################################
#                   CROSS-VALIDATION WITH KFOLD 
##########################################################################

kf = KFold(5)
    
oos_y = []
oos_pred = []
fold = 0
for train, test in kf.split(x_main):        
    fold+=1
    print(f"Fold #{fold}")
        
    x_train = x_main[train]
    y_train = y_main[train]
    x_test = x_main[test]
    y_test = y_main[test]
    
    tf.keras.mixed_precision.experimental.set_policy('float64')
    np.random.seed(10)
    classifier = Sequential()

##########################################################################
#                   MODEL DEFINITION
##########################################################################

    classifier.add(Dense(64, kernel_initializer = 'uniform', activation = 'relu', input_dim = x.shape[1])) 
    classifier.add(layers.Dense(16, activation='relu'))
    classifier.add(layers.Dense(8, activation='relu'))
    classifier.add(Dense(1, kernel_initializer = 'uniform'))
    classifier.add(layers.Dropout(0.01))
    classifier.add(layers.Dense(1))
    
    classifier.compile(optimizer = 'adam', loss = 'mean_squared_error')        
    
    history = classifier.fit(x_train,y_train,validation_data=(x_test, y_test),
              verbose=0,epochs=100)
    
    pred = classifier.predict(x_test)
    
    oos_y.append(y_test)
    oos_pred.append(pred) 

    
##########################################################################
#                  MEASURE EVALUATION: R2 + MSE + RMSE
##########################################################################
    
    #1. Root Sqaured Error
    R2 = r2_score(pred,y_test)
    print(f"Fold Score (R2): {R2}")
    
    #2. Mean Squared Error
    MSE = mean_squared_error(pred,y_test)
    print(f"Fold Score (MSE): {MSE}")

    #3. Root Mean Sqaured Error
    RMSE = np.sqrt(metrics.mean_squared_error(pred,y_test))
    print(f"Fold Score (RMSE): {RMSE}")

# Create OOS Predictions & Measure Errors
oos_y = np.concatenate(oos_y)
oos_pred = np.concatenate(oos_pred)

RMSE = np.sqrt(metrics.mean_squared_error(oos_pred,oos_y))

R2 = r2_score(oos_pred,oos_y)

MSE = mean_squared_error(oos_pred,oos_y)

print()
print(f"Cross-validated (R2): {R2}")
print(f"Cross-validated (MSE): {MSE}")
print(f"Cross-validated Score (RMSE): {RMSE}")   
    
# Cross-Validated Prediction
holdout_pred = classifier.predict(x_holdout)

R2 = r2_score(holdout_pred,y_holdout)
print(f"Holdout (R2): {R2}")

MSE = mean_squared_error(holdout_pred,y_holdout)
print(f"Holdout (MSE): {MSE}")

RMSE = np.sqrt(metrics.mean_squared_error(holdout_pred,y_holdout))
print(f"Holdout Score (RMSE): {RMSE}")

# Write the cross-validated prediction
oos_y = pd.DataFrame(oos_y)
oos_pred = pd.DataFrame(oos_pred)

City = pd.concat( [oos_y, oos_pred],axis=1 )


Fold #1
Fold Score (R2): 0.9975081768969356
Fold Score (MSE): 0.00013628340908664318
Fold Score (RMSE): 0.011674048530250471
Fold #2
Fold Score (R2): 0.9978088830708801
Fold Score (MSE): 0.00011753661399912413
Fold Score (RMSE): 0.010841430440634858
Fold #3
Fold Score (R2): 0.9977814984966893
Fold Score (MSE): 0.00011993178437999322
Fold Score (RMSE): 0.010951337104664125
Fold #4
Fold Score (R2): 0.9975483644424801
Fold Score (MSE): 0.00013980383449482722
Fold Score (RMSE): 0.011823867154819832
Fold #5
Fold Score (R2): 0.9913757576306896
Fold Score (MSE): 0.0004780845516104918
Fold Score (RMSE): 0.02186514467389804

Cross-validated (R2): 0.9963941904560855
Cross-validated (MSE): 0.00019831522331082853
Cross-validated Score (RMSE): 0.014082443797538428
Holdout (R2): 0.9916729748018442
Holdout (MSE): 0.000450207122717228
Holdout Score (RMSE): 0.02121808480323396


In [106]:
##########################################################################
#                  PLOTTING CITY DNN MODEL PERFORMANCE
##########################################################################

fig = go.Figure()
fig.add_trace(go.Scattergl(y=history.history['loss'],
                    name='Train'))
fig.add_trace(go.Scattergl(y=history.history['val_loss'],
                    name='Validation'))
fig.update_layout(height=500, width=700,
                  xaxis_title='Epoch',
                  yaxis_title='Mean Sqaured Error')
fig.show() 

In [107]:
from ann_visualizer.visualize import ann_viz

ann_viz(classifier, view=True, filename='City_DNN.gv', title='City Deep Neural Network')

In [108]:
# Model Summary

print(classifier.summary())

Model: "sequential_39"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_195 (Dense)            (None, 64)                704       
_________________________________________________________________
dense_196 (Dense)            (None, 16)                1040      
_________________________________________________________________
dense_197 (Dense)            (None, 8)                 136       
_________________________________________________________________
dense_198 (Dense)            (None, 1)                 9         
_________________________________________________________________
dropout_39 (Dropout)         (None, 1)                 0         
_________________________________________________________________
dense_199 (Dense)            (None, 1)                 2         
Total params: 1,891
Trainable params: 1,891
Non-trainable params: 0
___________________________________________________

# (b) Highway Mileage CO2 Prediction

In [111]:
df1 = pd.read_csv('CO2_Cleaned.csv')


df1 = df1.drop(['City_Mileage', 'Min_Wght'], axis = 1)

target_col = "CO2"

x1 = df1.loc[:, df1.columns != target_col].values
y1 = df1.loc[:, target_col].values

In [112]:
from sklearn.model_selection import train_test_split
import pandas as pd
import os
import numpy as np
from sklearn import metrics
from scipy.stats import zscore
from sklearn.model_selection import KFold

# Keep a 10% holdout
x_main, x_holdout, y_main, y_holdout = train_test_split(    
    x1, y1, test_size=0.10) 

##########################################################################
#                   CROSS-VALIDATION WITH KFOLD 
##########################################################################

kf = KFold(5)
    
oos_y = []
oos_pred = []
fold = 0
for train, test in kf.split(x_main):        
    fold+=1
    print(f"Fold #{fold}")
        
    x_train = x_main[train]
    y_train = y_main[train]
    x_test = x_main[test]
    y_test = y_main[test]
    
    tf.keras.mixed_precision.experimental.set_policy('float64')
    np.random.seed(10)
    classifier = Sequential()

    #Defining Model
    classifier.add(Dense(64, kernel_initializer = 'uniform', activation = 'relu', input_dim = x1.shape[1])) 
    classifier.add(layers.Dense(16, activation='relu'))
    classifier.add(layers.Dense(8, activation='relu'))
    classifier.add(Dense(1, kernel_initializer = 'uniform'))
    classifier.add(layers.Dropout(0.01))
    classifier.add(layers.Dense(1))
    
    classifier.compile(optimizer = 'adam', loss = 'mean_squared_error')        
    
    
    history = classifier.fit(x_train,y_train,validation_data=(x_test, y_test),
              verbose=0,epochs=100)
    
    pred = classifier.predict(x_test)
    
    oos_y.append(y_test)
    oos_pred.append(pred) 

##########################################################################
#                  MEASURE EVALUATION: R2 + MSE + RMSE
##########################################################################

    #1. Root Sqaured Error
    R2 = r2_score(pred,y_test)
    print(f"Fold Score (R2): {R2}")
    
    #2. Mean Squared Error
    MSE = mean_squared_error(pred,y_test)
    print(f"Fold Score (MSE): {MSE}")

    #3. Root Mean Sqaured Error
    RMSE = np.sqrt(metrics.mean_squared_error(pred,y_test))
    print(f"Fold Score (RMSE): {RMSE}")
    
##########################################################################
#            Create OOS Predictions & Measure Errors
##########################################################################

oos_y = np.concatenate(oos_y)
oos_pred = np.concatenate(oos_pred)

RMSE = np.sqrt(metrics.mean_squared_error(oos_pred,oos_y))

R2 = r2_score(oos_pred,oos_y)

MSE = mean_squared_error(oos_pred,oos_y)

print()
print(f"Cross-validated (R2): {R2}")
print(f"Cross-validated (MSE): {MSE}")
print(f"Cross-validated Score (RMSE): {RMSE}")   
    
##########################################################################
#              Cross-Validated Prediction
##########################################################################

holdout_pred = classifier.predict(x_holdout)

R2 = r2_score(holdout_pred,y_holdout)
print(f"Holdout (R2): {R2}")

MSE = mean_squared_error(holdout_pred,y_holdout)
print(f"Holdout (MSE): {MSE}")

RMSE = np.sqrt(metrics.mean_squared_error(holdout_pred,y_holdout))
print(f"Holdout Score (RMSE): {RMSE}")

# Write the cross-validated prediction
oos_y = pd.DataFrame(oos_y)
oos_pred = pd.DataFrame(oos_pred)

oosDF = pd.concat( [oos_y, oos_pred],axis=1 )
oosDF.to_csv('Hway_CO2_Prediction.csv',index=False)

Fold #1
Fold Score (R2): 0.9979511706784369
Fold Score (MSE): 0.0001122265345628082
Fold Score (RMSE): 0.01059370258987896
Fold #2
Fold Score (R2): 0.994896769835043
Fold Score (MSE): 0.00028749078062875706
Fold Score (RMSE): 0.016955553091207526
Fold #3
Fold Score (R2): 0.9955853249065963
Fold Score (MSE): 0.00023385124307864262
Fold Score (RMSE): 0.015292195495697884
Fold #4
Fold Score (R2): 0.9970286989310312
Fold Score (MSE): 0.00016343722421561867
Fold Score (RMSE): 0.012784256889456605
Fold #5
Fold Score (R2): 0.9945165376638746
Fold Score (MSE): 0.000297516834226123
Fold Score (RMSE): 0.017248676303592777

Cross-validated (R2): 0.995998818886771
Cross-validated (MSE): 0.00021890318261298268
Cross-validated Score (RMSE): 0.014795377068969303
Holdout (R2): 0.993904685274216
Holdout (MSE): 0.00034578542146191846
Holdout Score (RMSE): 0.018595306436354268


In [None]:
##########################################################################
#                  PLOTTING HIGHWAY DNN MODEL PERFORMANCE
##########################################################################

fig = go.Figure()
fig.add_trace(go.Scattergl(y=history.history['loss'],
                    name='Train'))
fig.add_trace(go.Scattergl(y=history.history['val_loss'],
                    name='Validation'))
fig.update_layout(height=500, width=700,
                  xaxis_title='Epoch',
                  yaxis_title='Mean Sqaured Error')
fig.show() 

In [113]:
Hi

Model: "sequential_46"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_230 (Dense)            (None, 64)                704       
_________________________________________________________________
dense_231 (Dense)            (None, 16)                1040      
_________________________________________________________________
dense_232 (Dense)            (None, 8)                 136       
_________________________________________________________________
dense_233 (Dense)            (None, 1)                 9         
_________________________________________________________________
dropout_46 (Dropout)         (None, 1)                 0         
_________________________________________________________________
dense_234 (Dense)            (None, 1)                 2         
Total params: 1,891
Trainable params: 1,891
Non-trainable params: 0
___________________________________________________

# Calculate Difference in CO2 Between City and Highway Predictions

Rename Columns in City Predictions

In [90]:
City = pd.read_csv('City_CO2_Prediction.csv')

City = pd.DataFrame(City) 

In [91]:
Highway= pd.read_csv('Hway_CO2_Prediction.csv')

Highway = pd.DataFrame(Highway) 

In [92]:
#Substracting Highway Predictions from City Predictions 

CO2_Savings = City - Highway
# Renaming the Columns to more meaningful names
CO2_Savings = CO2_Savings.rename(columns = {'0':'Prediction_OO', '0.1':'Prediction_Actual'})

# <span style="color:red">SAVING PREDICTED CO2 SAVINGS</span> 

In [93]:
CO2_Savings.to_csv('CO2_Savings.csv',index=False)


