In [1]:
import os
import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import keras_tuner as kt
import numpy as np

In [2]:
cwd = os.getcwd()

#### Read data

In [3]:
target = 'pm2_5'

In [4]:
train_file_path = os.path.join(
    cwd,
    'cleaned_dataset',
    'train.csv'
)
train_df = pd.read_csv(train_file_path)
train_df.head()

Unnamed: 0,site_latitude,site_longitude,humidity,temp_mean,SulphurDioxide_SO2_column_number_density,SulphurDioxide_SO2_column_number_density_amf,SulphurDioxide_SO2_slant_column_number_density,SulphurDioxide_cloud_fraction,SulphurDioxide_sensor_azimuth_angle,SulphurDioxide_sensor_zenith_angle,...,Cloud_cloud_top_pressure_na,Cloud_cloud_top_height_na,Cloud_cloud_base_pressure_na,Cloud_cloud_base_height_na,Cloud_cloud_optical_depth_na,Cloud_surface_albedo_na,Cloud_sensor_azimuth_angle_na,Cloud_sensor_zenith_angle_na,Cloud_solar_azimuth_angle_na,Cloud_solar_zenith_angle_na
0,0.28904,32.58958,0.721848,23.673913,1e-05,0.785107,9e-06,0.148113,-97.301544,42.489365,...,0,0,0,0,0,0,0,0,0,0
1,0.3547,32.5026,0.85701,21.170833,1e-05,0.785107,9e-06,0.148113,-97.301544,42.489365,...,0,0,0,0,0,0,0,0,0,0
2,0.344,32.553,0.831972,22.041667,1e-05,0.785107,9e-06,0.148113,-97.301544,42.489365,...,0,0,0,0,0,0,0,0,0,0
3,0.343729,32.551314,0.0,0.0,-0.000203,0.645272,-0.000131,0.275436,-97.33699,63.287609,...,0,0,0,0,0,0,0,0,0,0
4,0.353465,32.560322,0.896038,22.052083,1e-05,0.785107,9e-06,0.148113,-97.301544,42.489365,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X_train = train_df.drop(
    labels=[target],
    axis=1
).values
y_train = train_df[target].values

X_train.shape, y_train.shape

((7938, 133), (7938,))

In [6]:
valid_file_path = os.path.join(
    cwd,
    'cleaned_dataset',
    'valid.csv'
)
valid_df = pd.read_csv(valid_file_path)
valid_df.head()

Unnamed: 0,site_latitude,site_longitude,humidity,temp_mean,SulphurDioxide_SO2_column_number_density,SulphurDioxide_SO2_column_number_density_amf,SulphurDioxide_SO2_slant_column_number_density,SulphurDioxide_cloud_fraction,SulphurDioxide_sensor_azimuth_angle,SulphurDioxide_sensor_zenith_angle,...,Cloud_cloud_top_pressure_na,Cloud_cloud_top_height_na,Cloud_cloud_base_pressure_na,Cloud_cloud_base_height_na,Cloud_cloud_optical_depth_na,Cloud_surface_albedo_na,Cloud_sensor_azimuth_angle_na,Cloud_sensor_zenith_angle_na,Cloud_solar_azimuth_angle_na,Cloud_solar_zenith_angle_na
0,0.343729,32.551314,0.915976,20.220486,1e-05,0.785107,9e-06,0.148113,-97.301544,42.489365,...,0,0,0,0,0,0,0,0,0,0
1,-1.244985,29.989236,0.636146,14.80625,1e-05,0.785107,9e-06,0.148113,-97.301544,42.489365,...,0,0,0,0,0,0,0,0,0,0
2,0.360209,32.610756,0.815392,20.884722,1e-05,0.785107,9e-06,0.148113,-97.301544,42.489365,...,0,0,0,0,0,0,0,0,0,0
3,0.307489,32.611755,0.692386,24.837879,0.000208,0.821581,0.000171,0.131386,-100.64325,26.927464,...,1,1,1,1,1,1,1,1,1,1
4,0.354825,32.67781,0.900618,20.433681,1e-05,0.785107,9e-06,0.148113,-97.301544,42.489365,...,0,0,0,0,0,0,0,0,0,0


In [7]:
X_valid = valid_df.drop(
    labels=[target],
    axis=1
).values
y_valid = valid_df[target].values

X_valid.shape, y_valid.shape

((1985, 133), (1985,))

#### Scale the data

In [8]:
ss = StandardScaler()
ss.fit(X_train)
X_train_scaled = ss.transform(X_train)
X_valid_scaled = ss.transform(X_valid)

#### Create model

In [9]:
unit_choices = [8, 16, 32, 64, 126, 256, 512]
activation_choices = ['relu', 'tanh']
max_number_of_layers = 10

In [10]:
def build_model(hp):
  model = tf.keras.models.Sequential()
  for i in range(hp.Int('number_of_layers', 1, max_number_of_layers)):
    model.add(
      tf.keras.layers.Dense(
        hp.Choice('units', unit_choices),
        activation=hp.Choice(
          'dense_activation',
          values=activation_choices
        )
      )
    )
  model.add(tf.keras.layers.Dropout(rate=0.25))
  model.add(tf.keras.layers.Dense(1, activation='linear'))

  learning_rate = hp.Float('learning_rate', min_value=1e-5, max_value=1e-1, sampling='log')

  model.compile(
    optimizer=tf.keras.optimizers.RMSprop(learning_rate=learning_rate),
    loss='mae',
    metrics=['mae']
  )
  return model

In [11]:
tuner = kt.RandomSearch(
    build_model,
    objective=kt.Objective("val_mae", direction="min"),
    max_trials=15,
    project_name='layer_airquo',
    executions_per_trial=2,
    overwrite=True,
)

In [12]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 10, 'step': 1, 'sampling': None}
units (Choice)
{'default': 8, 'conditions': [], 'values': [8, 16, 32, 64, 126, 256, 512], 'ordered': True}
dense_activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
learning_rate (Float)
{'default': 1e-05, 'conditions': [], 'min_value': 1e-05, 'max_value': 0.1, 'step': None, 'sampling': 'log'}


In [13]:
tuner.search(
    X_train_scaled, y_train,
    epochs=50,
    validation_data=(X_valid_scaled, y_valid)
)

Trial 14 Complete [00h 03m 09s]
val_mae: 20.15346622467041

Best val_mae So Far: 12.795536994934082
Total elapsed time: 00h 33m 01s

Search: Running Trial #15

Value             |Best Value So Far |Hyperparameter
8                 |2                 |num_layers
126               |126               |units
relu              |tanh              |dense_activation
1.4062e-05        |0.0026283         |learning_rate

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50

In [None]:
tuner.results_summary()

#### Note

There are two ways to build the model from the search.

In [None]:
best_parameters = tuner.get_best_hyperparameters()[0]
best_model = build_model(best_parameters)
best_model

Another way to build the model

In [None]:
best_model = tuner.get_best_models()[0]
best_model

#### Build the model

In [None]:
input_shape = (1, X_train_scaled.shape[1])
best_model.build(input_shape=input_shape)
best_model.summary()

In [None]:
history = best_model.fit(
    x=X_train,
    y=y_train,
    epochs=10,
    shuffle=True,
    validation_data=(X_valid, y_valid)
)

In [None]:
best_model_dir = os.path.join(
    cwd,
    'best_model'
)

In [None]:
best_model.save(filepath=best_model_dir, include_optimizer=True)

In [None]:
new_model = tf.keras.models.load_model(best_model_dir)

In [None]:
new_model