In [1]:
import pandas as pd
from tensorflow  import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
dataset  = pd.read_csv('PNQ_AQI.csv')
dataset.head()

Unnamed: 0,Date,SO2 µg/m3,Nox µg/m3,RSPM µg/m3,SPM,CO2 µg/m3,AQI,Location
0,2009-01-01 00:00:00,15,53,179.0,,,153.0,MPCB-KR
1,2009-02-01 00:00:00,15,48,156.0,,,137.0,MPCB-KR
2,2009-03-01 00:00:00,13,51,164.0,,,143.0,MPCB-KR
3,2009-04-01 00:00:00,8,37,135.0,,,123.0,MPCB-KR
4,2009-07-01 00:00:00,13,36,140.0,,,127.0,MPCB-KR


In [4]:
dataset.dtypes

Date           object
SO2 µg/m3      object
Nox µg/m3      object
RSPM µg/m3    float64
SPM           float64
CO2 µg/m3     float64
AQI           float64
Location       object
dtype: object

# I have object type in Nox... columns so converting it to float

In [5]:
dataset['Nox µg/m3'] = pd.to_numeric(dataset.columns[2], errors='coerce').astype(float)

In [6]:
dataset.dtypes

Date           object
SO2 µg/m3      object
Nox µg/m3     float64
RSPM µg/m3    float64
SPM           float64
CO2 µg/m3     float64
AQI           float64
Location       object
dtype: object

In [7]:
df1 = dataset.iloc[:, 3: 4]
df2 = dataset.iloc[:, -2]
df = pd.concat([df1, df2], axis=1)
df.head()

Unnamed: 0,RSPM µg/m3,AQI
0,179.0,153.0
1,156.0,137.0
2,164.0,143.0
3,135.0,123.0
4,140.0,127.0


In [8]:
df.isnull().sum()

RSPM µg/m3     1
AQI           57
dtype: int64

In [9]:
df = df.dropna()
df.shape

(7786, 2)

In [10]:
df.isnull().sum()

RSPM µg/m3    0
AQI           0
dtype: int64

In [11]:
X = df.iloc[:, 1: 4]
y = df.iloc[:, -2]

In [12]:
#X = np.asarray(X).astype(np.float32)
#y = np.asarray(y).astype(np.float32)

In [13]:
X.shape

(7786, 1)

In [21]:
def build_model(hp):                                             # this 'hp' argument is autometically generated 
    model  = keras.Sequential()
    for i in range(hp.Int('num_layers', 2, 20)):                 # it says NN have hidden layer bw 2 - 20
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,        # no. of nodes range 32 - 512
                                            max_value=512,
                                            step=32),
                               activation='relu'))
        model.add(layers.Dense(1, activation='linear'))          # its a regression problem so it has 1 output
        
        model.compile(
            optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
            loss='mean_absolute_error',
            metrics=['mean_absolute_error'])
        
        return model
    
    # hp.Int try differnt different values
    # hp.Chosice take only The best parameter

In [22]:
tuner = RandomSearch(
    build_model, 
    objective = 'val_mean_absolute_error', # whenever we training the  regression model  we get this parameter
    max_trials = 5,                         # total 15 times would it be run 
    executions_per_trial = 3,
    directory = 'project', 
    project_name = 'Air_Quality_Index')

INFO:tensorflow:Reloading Oracle from existing project project\Air_Quality_Index\oracle.json
INFO:tensorflow:Reloading Tuner from project\Air_Quality_Index\tuner0.json


In [23]:
tuner.search_space_summary()

Search space summary
Default search space size: 3
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [24]:
#Spliting The Dataset into Training & Testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [25]:
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

INFO:tensorflow:Oracle triggered exit


In [26]:
tuner.results_summary()  # it gives the 10 best trials

Results summary
Results in project\Air_Quality_Index
Showing 10 best trials
Objective(name='val_mean_absolute_error', direction='min')
Trial summary
Hyperparameters:
num_layers: 16
units_0: 128
learning_rate: 0.001
Score: 8.115512212117514
Trial summary
Hyperparameters:
num_layers: 19
units_0: 256
learning_rate: 0.01
Score: 10.967863082885742
Trial summary
Hyperparameters:
num_layers: 17
units_0: 320
learning_rate: 0.01
Score: 11.277034123738607
Trial summary
Hyperparameters:
num_layers: 5
units_0: 416
learning_rate: 0.0001
Score: 14.183066050211588
Trial summary
Hyperparameters:
num_layers: 3
units_0: 384
learning_rate: 0.0001
Score: 14.20319684346517
