In [None]:
import os
from datetime import datetime
import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import keras_tuner as kt
import numpy as np

In [None]:
cwd = os.getcwd()

#### Read data

In [None]:
target = 'pm2_5'

In [None]:
train_file_path = os.path.join(
    cwd,
    'cleaned_dataset',
    'train.csv'
)
train_df = pd.read_csv(train_file_path)
train_df.head()

In [None]:
X_train = train_df.drop(
    labels=[target],
    axis=1
).values
y_train = train_df[target].values

X_train.shape, y_train.shape

In [None]:
valid_file_path = os.path.join(
    cwd,
    'cleaned_dataset',
    'valid.csv'
)
valid_df = pd.read_csv(valid_file_path)
valid_df.head()

In [None]:
X_valid = valid_df.drop(
    labels=[target],
    axis=1
).values
y_valid = valid_df[target].values

X_valid.shape, y_valid.shape

#### Scale the data

In [None]:
ss = StandardScaler()
ss.fit(X_train)
X_train_scaled = ss.transform(X_train)
X_valid_scaled = ss.transform(X_valid)

#### Create model

In [None]:
unit_choices = [8, 16, 32, 64, 126, 256, 512]
activation_choices = ['relu', 'tanh']
number_of_layers = 3

In [None]:
def build_model(hp):
  model = tf.keras.models.Sequential()
  for i in range(number_of_layers):
    model.add(
      tf.keras.layers.Dense(
        hp.Choice('units', unit_choices),
        activation=hp.Choice(
          'dense_activation',
          values=activation_choices
        )
      )
    )
  model.add(tf.keras.layers.Dropout(rate=0.25))
  model.add(tf.keras.layers.Dense(1, activation='linear'))

  learning_rate = hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='log')

  model.compile(
    optimizer=tf.keras.optimizers.RMSprop(learning_rate=learning_rate),
    loss='mae',
    metrics=['mae']
  )
  return model

In [None]:
tuner = kt.RandomSearch(
    build_model,
    objective=kt.Objective('val_mae', direction='min'),
    max_trials=15,
    project_name='layer_airquo',
    executions_per_trial=2,
    overwrite=True,
)

In [None]:
tuner.search_space_summary()

In [None]:
tuner.search(
    X_train_scaled, y_train,
    epochs=10,
    validation_data=(X_valid_scaled, y_valid)
)

In [None]:
tuner.results_summary()

#### Note

There are two ways to get the best model from the search.

In [None]:
best_parameters = tuner.get_best_hyperparameters()[0]
best_model = build_model(best_parameters)
best_model

Another way to build the model

In [None]:
best_model = tuner.get_best_models()[0]
best_model

#### Build the model

In [None]:
input_shape = (None, X_train_scaled.shape[1])
best_model.build(input_shape=input_shape)
best_model.summary()

In [None]:
history = best_model.fit(
    x=X_train,
    y=y_train,
    epochs=10,
    shuffle=True,
    validation_data=(X_valid, y_valid),
    callbacks=[
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=5,
            min_lr=0.001
        )
    ]
)

#### Prediction

In [None]:
test_file_path = os.path.join(
    cwd,
    'cleaned_dataset',
    'test.csv'
)
test_df = pd.read_csv(test_file_path)
test_df.head()

In [None]:
X_test = test_df.values

In [None]:
X_test.shape

In [None]:
X_test_scaled = ss.transform(X_test)

In [None]:
y_test = best_model.predict(X_test_scaled)

#### Submission

In [None]:
submission = pd.read_csv(
    filepath_or_buffer=os.path.join(
        cwd,
        'dataset',
        'SampleSubmission.csv'
    )
)
submission.head()

In [None]:
submission[target] = y_test

In [None]:
submission.head()

In [None]:
now = datetime.now()
timestamp = now.strftime("%Y-%m-%d %H-%M")

model_name = 'ann_keras_tuner'
attempt = 1
submission.to_csv(
    path_or_buf=os.path.join(
        cwd,
        'submissions',
        f'{timestamp}_{model_name}_{attempt}.csv'
    ),
    index=False
)

In [None]:
best_model_dir = os.path.join(
    cwd,
    'best_model',
    f'{attempt}'
)
os.makedirs(best_model_dir, exist_ok=True)

In [None]:
best_model.save(filepath=best_model_dir, include_optimizer=True)

In [None]:
new_model = tf.keras.models.load_model(best_model_dir)

In [None]:
new_model