In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GaussianNoise, GaussianDropout, BatchNormalization
from tensorflow.keras import layers, regularizers
from sklearn.model_selection import KFold
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU') #disables GPU

In [9]:
seed=42
# Model configuration
batch_size = 32
loss_function = tf.losses.mean_squared_error
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=1e-3)
verbosity = 2
num_folds = 15
no_epochs= 300

In [10]:
# Load the CSV file
data = pd.read_csv("filled.csv")
# Convert the date column to a datetime object
data['date'] = pd.to_datetime(data['date'])

# Remove any missing values
data = data.dropna()

# Set the date column as the index of the DataFrame
data = data.set_index('date')

data=data[['BC','N_CPC', 'PM-2.5', 'PM-1.0', 'O3', 'CO', 'NOX', 'TEMP', 'HUM']]

In [11]:
x_train, x_test, y_train, y_test = train_test_split(data.drop(['BC'], axis=1), data.BC, test_size=0.15, random_state=seed, shuffle=True)

In [12]:
# Determine shape of the data
input_shape = x_train.shape

scaler = StandardScaler()
input_train = scaler.fit_transform(x_train)
input_test = scaler.transform(x_test)

scaler_y = StandardScaler()
target_train = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1))
target_test = scaler_y.transform(np.array(y_test).reshape(-1, 1))

In [13]:
# Define per-fold score containers
acc_per_fold = []
loss_per_fold = []

# Merge inputs and targets
inputs = np.concatenate((input_train, input_test), axis=0)
targets = np.concatenate((target_train, target_test), axis=0)

In [15]:
# Define the K-fold Cross Validator
kfold = KFold(n_splits=num_folds, shuffle=True)

# K-fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(inputs, targets):

  # Define the model architecture
  model = Sequential()
  #model.add(GaussianNoise(0.1))
  model.add(Dense(64, activation='relu', input_shape=[x_train.shape[-1]]))
  #model.add(BatchNormalization())
  #model.add(Dropout(0.1))
  model.add(GaussianNoise(0.1))
  model.add(Dense(112, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.0)))
  #model.add(BatchNormalization())
  #model.add(Dropout(0.1))
  #model.add(GaussianNoise(0.05))
  model.add(Dense(96, activation='relu', input_shape=[x_train.shape[-1]], kernel_regularizer=regularizers.l1_l2(l1=0.00, l2=0.0)))
  #model.add(BatchNormalization())
  model.add(Dropout(0.1))
  model.add(tf.keras.layers.Dense(1))

  model.compile(
            optimizer=optimizer,
            loss=loss_function,
            metrics=[tf.metrics.mean_absolute_error]
        )


  # Generate a print
  print('------------------------------------------------------------------------')
  print(f'Training for fold {fold_no} ...')

  # Fit data to model
  history = model.fit(inputs[train], targets[train],
              batch_size=batch_size,
              epochs=no_epochs,
              verbose=verbosity,
              callbacks=[tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.7, patience=50, min_lr=0.0001)
                ])

  # Generate generalization metrics
  scores = model.evaluate(inputs[test], targets[test], verbose=0)
  scores[1] = r2_score(targets[test],  model.predict(inputs[test]))

  print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; R2 score of {scores[1]*100}%')
  acc_per_fold.append(scores[1] * 100)
  loss_per_fold.append(scores[0])

  # Increase fold number
  fold_no = fold_no + 1

# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/300
134/134 - 0s - loss: 1.4682 - mean_absolute_error: 0.4094 - lr: 0.0010 - 261ms/epoch - 2ms/step
Epoch 2/300
134/134 - 0s - loss: 0.5022 - mean_absolute_error: 0.3603 - lr: 0.0010 - 77ms/epoch - 576us/step
Epoch 3/300
134/134 - 0s - loss: 0.4457 - mean_absolute_error: 0.3491 - lr: 0.0010 - 148ms/epoch - 1ms/step
Epoch 4/300
134/134 - 0s - loss: 0.4300 - mean_absolute_error: 0.3498 - lr: 0.0010 - 79ms/epoch - 592us/step
Epoch 5/300
134/134 - 0s - loss: 0.4005 - mean_absolute_error: 0.3396 - lr: 0.0010 - 79ms/epoch - 589us/step
Epoch 6/300
134/134 - 0s - loss: 0.3945 - mean_absolute_error: 0.3373 - lr: 0.0010 - 87ms/epoch - 649us/step
Epoch 7/300
134/134 - 0s - loss: 0.3836 - mean_absolute_error: 0.3374 - lr: 0.0010 - 74ms/epoch - 556us/step
Epoch 8/300
134/134 - 0s - loss: 0.3772 - mean_absolute_error: 0.3337 - lr: 0.0010 - 72ms/epoch - 537us/step
Epoch 9/300
134/134 - 0s - loss: 