# Imports

In [7]:
import pandas as pd
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
tf.config.run_functions_eagerly(True)
from sklearn.preprocessing import StandardScaler

# Creating Directory 

In [16]:
parent_dir = "D:\Programming\python\my_ai_lab\dataset\PEMS"
directory = ["train","validate","test"]
data_dir = []

for dir in directory:
    path = os.path.join(parent_dir, dir)
    data_dir.append(path)
    os.makedirs(path, exist_ok = True)
print(data_dir)

['D:\\Programming\\python\\my_ai_lab\\dataset\\PEMS\\train', 'D:\\Programming\\python\\my_ai_lab\\dataset\\PEMS\\validate', 'D:\\Programming\\python\\my_ai_lab\\dataset\\PEMS\\test']


# Data Loading

In [18]:
train_file_name = ["gt_2011.csv","gt_2012.csv","gt_2013.csv","gt_2014.csv","gt_2015.csv"]
#reading 5 file into single data frame



def data_loader_from_multiple_file(filename, filedir):
    data_pd = None
    frames = []
    for file in train_file_name:
        file = os.path.join(parent_dir,file)
        read_file = pd.read_csv (file)
        frames.append(read_file)

        data_pd = pd.concat(frames,axis = 0)
    return data_pd

data_pd = data_loader_from_multiple_file(train_file_name, parent_dir)

columns = [col for col in data_pd.columns]

In [20]:
def shuffle(data,no_of_shuffle=100):
    data_tf = tf.convert_to_tensor(data)
    for i in range(no_of_shuffle):
        data_tf = tf.random.shuffle(data_tf, seed = 42)
    return pd.DataFrame(data = data_tf)

shuffled_data_pd = shuffle(data_pd,100)


# Data splitting

In [23]:
data_np = shuffled_data_pd.to_numpy()
train_np, val_np, test_np = np.split(data_np, [int(0.8*len(data_np)), int(0.9*len(data_np))])

In [25]:
train_pd = pd.DataFrame(data = train_np,columns = columns)

val_pd = pd.DataFrame(data = val_np,columns = columns)
test_pd = pd.DataFrame(data = test_np,columns = columns)


In [30]:
X_train_pd = train_pd.drop(columns = 'NOX',)
y_train_pd = train_pd.iloc[:,10:]

X_val_pd = val_pd.drop(columns = 'NOX',)
y_val_pd = val_pd.iloc[:,10:]

X_test_pd = test_pd.drop(columns = 'NOX',)
y_test_pd = test_pd.iloc[:,10:]

In [29]:
X_train_np = X_train_pd.to_numpy()
y_train_np = y_train_pd.to_numpy()

X_val_np = X_val_pd.to_numpy()
y_val_np = y_val_pd.to_numpy()

X_test_np = X_test_pd.to_numpy()
y_test_np = y_test_pd.to_numpy()

# Data preprocessing

In [28]:
scaler_x = StandardScaler()
scaler_y = StandardScaler()

scaler_x.fit(X_train_np)
scaler_y.fit(y_train_np)


In [31]:
X_train_np = scaler_x.transform(X_train_np)
y_train_np = scaler_y.transform(y_train_np)

X_val_np = scaler_x.transform(X_val_np)
y_val_np = scaler_y.transform(y_val_np)

X_test_np = scaler_x.transform(X_test_np)
y_test_np = scaler_y.transform(y_test_np)

y_test_np.shape

(3674, 1)

# Feed forward model

In [34]:
def pems_model():
  
    initializer = keras.initializers.HeNormal(seed = 42)
    model = tf.keras.Sequential([
    
    keras.layers.InputLayer(input_shape = [None, 10]),
    keras.layers.Dense(64,kernel_initializer=initializer, activation='elu'),
    keras.layers.Dropout(0.20),
    #tf.keras.layers.LayerNormalization(axis=1),
    keras.layers.Dense(64 ,kernel_initializer=initializer, activation='elu'),
    keras.layers.Dropout(0.20),
    #keras.layers.Dense(64 ,kernel_initializer=initializer, activation='elu'),
    #keras.layers.Dropout(0.20),
    #tf.keras.layers.LayerNormalization(axis=1),
    keras.layers.Dense(1, kernel_initializer=initializer ,activation='elu')
  ])


    optimizer = keras.optimizers.SGD(learning_rate=0.0001,momentum = 0.9)
    loss = keras.losses.MeanSquaredError()


    model.compile(loss= loss, optimizer= optimizer)
    return model

pems_model = pems_model()

In [35]:
pems_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, None, 64)          704       
                                                                 
 dropout_2 (Dropout)         (None, None, 64)          0         
                                                                 
 dense_4 (Dense)             (None, None, 64)          4160      
                                                                 
 dropout_3 (Dropout)         (None, None, 64)          0         
                                                                 
 dense_5 (Dense)             (None, None, 1)           65        
                                                                 
Total params: 4,929
Trainable params: 4,929
Non-trainable params: 0
_________________________________________________________________


In [39]:
# Define some useful callbacks
#Reduce learning rate when a metric has stopped improving.
reducelrp = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.000000001)
# Stop training when a monitored quantity has stopped improving. 
# By default, mode is set to ‘auto‘ and knows that you want to minimize loss or maximize accuracy.
early_stopping_monitor=keras.callbacks.EarlyStopping(monitor='val_loss',verbose=1, patience=10, baseline=0.4,min_delta = 0.001 )
# Save the model after every epoch.
best_trained_model_file= 'pems_model.h5'
checkpoint = keras.callbacks.ModelCheckpoint(best_trained_model_file, verbose=0, monitor='val_loss',save_best_only=True, mode='auto')  
#place callbacks want to enable on this list
callbacks= [checkpoint,reducelrp, checkpoint] #[checkpoint,

In [40]:
#fitting the model

history = pems_model.fit( x = X_train_np, y = y_train_np, epochs=100,
  validation_data = (X_val_np,y_val_np) ,callbacks = callbacks,verbose = 'auto')


Epoch 1/100
  3/919 [..............................] - ETA: 26s - loss: 0.2484



Epoch 2/100

KeyboardInterrupt: 

In [38]:
import matplotlib.pyplot as plt
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()
plt.savefig('performance.pdf')

NameError: name 'history' is not defined

In [45]:
model_load = keras.models.load_model('pems_model.h5')

In [46]:
y_predict = model_load.predict(X_test_np)

 10/115 [=>............................] - ETA: 0s





In [47]:
y_predict = pd.DataFrame(data = y_predict)
y_test_tf = pd.DataFrame(data = y_test_np)

In [48]:
y_predict.head()

Unnamed: 0,0
0,-0.192265
1,-0.403407
2,-0.804214
3,0.612911
4,-0.725784


In [49]:
y_test_tf.head()

Unnamed: 0,0
0,-0.199078
1,-0.178297
2,-1.33718
3,1.170613
4,-0.849117
