In [1]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import math
import datetime 
%load_ext tensorboard
print(tf.version.VERSION)

2.11.0


In [8]:
def tf_dataset(series_x, series_y, batch_size, shuffle_buffer, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices((series_x, series_y))
    if shuffle:
        ds = ds.cache().shuffle(shuffle_buffer).batch(batch_size).repeat()
    else:
        ds = ds.cache().batch(batch_size).repeat()

    return ds

def create_window_dataset(ds, lb, window_size):
    windowed_dataset = []
    labels = []
    for i in range(window_size, ds.shape[0] + 1):
        windowed_dataset.append(ds[i - window_size:i])
        labels.append(lb[i - 1])
        
    return np.array(windowed_dataset), np.array(labels)

def get_metrics_result(metrics, true_labels, predicted_labels):    
    metrics_result = []
    for metric in metrics:
        metric.reset_states()
        metric.update_state(true_labels, predicted_labels)
        metrics_result.append(metric.result().numpy())
    
    return metrics_result

In [16]:
df = pd.read_csv("Cleaned_df.csv" , parse_dates= ["date"] , index_col= "date")
rows , cols = df.shape
index_80_percent = int(rows*0.8)

Train_set = df.iloc[: index_80_percent].copy()
Test_set = df.iloc[index_80_percent :].copy()

lbl = Train_set["PM2.5 Aoti"]
Train_set.drop("PM2.5 Aoti" ,axis=1,inplace=True)
Train_set = pd.concat((Train_set , lbl) ,axis=1)

lbl = Test_set["PM2.5 Aoti"]
Test_set.drop("PM2.5 Aoti" ,axis=1,inplace=True)
Test_set = pd.concat((Test_set , lbl) ,axis=1)

Train_set_np = Train_set.to_numpy()
Test_set_np = Test_set.to_numpy()

In [30]:
BATCH_SIZE = 64
WINDOW_SIZE = 15
SHUFFLE_BUFFER = 1000


windowed_Train , labels_Train = create_window_dataset(Train_set_np[:,:-1] , Train_set_np[:,-1], window_size=WINDOW_SIZE)
train_set = tf_dataset(windowed_Train, labels_Train, batch_size = BATCH_SIZE,shuffle_buffer = SHUFFLE_BUFFER,shuffle=True)

windowed_Test , labels_Test = create_window_dataset(Test_set_np[:,:-1] , Test_set_np[:,-1],window_size=WINDOW_SIZE)
test_set = tf_dataset(windowed_Test, labels_Test, batch_size = BATCH_SIZE,shuffle_buffer = SHUFFLE_BUFFER,shuffle=True)

# Create Model

In [32]:
tf.keras.backend.clear_session()

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(filters=64,
                           kernel_size=5,
                           strides=1,
                           padding="causal",
                           activation="relu",
                           input_shape=windowed_Train.shape[-2:]),
    tf.keras.layers.MaxPooling1D(pool_size=2, strides=1, padding="valid"),
    tf.keras.layers.Conv1D(filters=32, kernel_size=3, strides=1, padding="causal", activation="relu"),
    tf.keras.layers.MaxPooling1D(pool_size=2, strides=1, padding="valid"),
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(1)
])

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(1e-4,
                                                             decay_steps=100000,
                                                             decay_rate=0.98,
                                                             staircase=False)

model.compile(loss=tf.keras.losses.MeanSquaredError(),
              optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.8),
              metrics=['mae'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 15, 64)            6144      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 14, 64)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 14, 32)            6176      
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 13, 32)           0         
 1D)                                                             
                                                                 
 lstm (LSTM)                 (None, 13, 128)           82432     
                                                                 
 lstm_1 (LSTM)               (None, 13, 128)           1

In [35]:
TRAIN_STEP = math.ceil(windowed_Train.shape[0] / BATCH_SIZE)
VALIDATION_STEP = math.ceil(windowed_Test.shape[0] / BATCH_SIZE)

In [39]:
log_dir = "logs\\" + datetime.datetime.now().strftime("%d-%m-%Y_%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

history = model.fit(train_set,
                    epochs=1000,
                    steps_per_epoch=TRAIN_STEP,
                    validation_data=test_set,
                    validation_steps=VALIDATION_STEP,
                    verbose=1,
                    callbacks=[tensorboard_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000

KeyboardInterrupt: 

In [41]:
arr = np.array([[1,2,4] , [6,3,6] , [42,6,2]])

In [42]:
from sklearn.preprocessing import MinMaxScaler

In [43]:
s = MinMaxScaler()
s.fit_transform(arr)

array([[0.        , 0.        , 0.5       ],
       [0.12195122, 0.25      , 1.        ],
       [1.        , 1.        , 0.        ]])

In [44]:
arr

array([[ 1,  2,  4],
       [ 6,  3,  6],
       [42,  6,  2]])

In [45]:
s.inverse_transform(s.fit_transform(arr))

array([[ 1.,  2.,  4.],
       [ 6.,  3.,  6.],
       [42.,  6.,  2.]])