In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np

In [44]:
air_data=pd.read_csv("D:\Study\Python\scripts\Deep_Learning\Air_Quality_KeraTuner\Air_data.csv")

In [45]:
air_data.shape

(1093, 9)

In [46]:
air_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1093 entries, 0 to 1092
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Temp    1093 non-null   float64
 1   Tmax    1093 non-null   float64
 2   Tmin    1093 non-null   float64
 3   SLP     1093 non-null   float64
 4   H       1093 non-null   int64  
 5   VV      1093 non-null   float64
 6   V       1093 non-null   float64
 7   VM      1093 non-null   float64
 8   PM 2.5  1092 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 77.0 KB


In [47]:
air_data.isnull().sum()

Temp      0
Tmax      0
Tmin      0
SLP       0
H         0
VV        0
V         0
VM        0
PM 2.5    1
dtype: int64

In [48]:
air_data.duplicated().sum()

362

In [49]:
air_data.describe()

Unnamed: 0,Temp,Tmax,Tmin,SLP,H,VV,V,VM,PM 2.5
count,1093.0,1093.0,1093.0,1093.0,1093.0,1093.0,1093.0,1093.0,1092.0
mean,26.009241,32.482251,19.460201,1008.081885,62.918573,2.003111,6.75151,15.805124,109.090984
std,7.237401,6.679078,7.438653,7.529237,15.709816,0.747541,3.841137,7.308435,84.46579
min,6.7,9.8,0.0,991.5,20.0,0.3,0.4,1.9,0.0
25%,19.3,27.8,12.1,1001.1,54.0,1.6,3.7,11.1,41.833333
50%,28.2,34.2,21.2,1008.1,64.0,1.9,6.5,14.8,83.458333
75%,31.7,37.0,26.0,1015.0,74.0,2.6,9.1,18.3,158.291667
max,38.5,45.5,32.7,1023.2,98.0,5.8,24.4,57.6,404.5


In [50]:
air_data.replace(to_replace=np.nan, value=air_data['PM 2.5'].mean(), inplace=True)

In [52]:
air_data=air_data.drop_duplicates()

In [55]:
X=air_data.drop(['PM 2.5'], axis=1)
Y=air_data['PM 2.5']

In [56]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X=scaler.fit_transform(X)

In [68]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y, test_size=0.2, random_state=0)

# Kerastuner_Deciding number of hidden layer, neurons and learning rate for linear problem:

In [63]:
from tensorflow import keras
from keras.models import Sequential
from keras import layers
from kerastuner.tuners import RandomSearch

In [61]:
def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int('num_layers', 2, 20)):                      
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,                      #number of neurons from 32 to 512 in each layer:
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(1, activation='linear'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='mean_absolute_error',
        metrics=['mean_absolute_error'])
    return model

In [64]:
tuner = RandomSearch(
    build_model,
    objective='val_mean_absolute_error',
    max_trials=5,
    executions_per_trial=3,
    directory='Project',
    project_name='Air_Quality_Index')

In [66]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


# tuner.search(X,Y) just like a fit a model

In [69]:
tuner.search(X, Y,
             epochs=5,
             validation_data=(X_test, Y_test))


Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
num_layers        |9                 |?                 
units_0           |256               |?                 
units_1           |320               |?                 
learning_rate     |0.01              |?                 

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

Trial 1 Complete [00h 00m 12s]
val_mean_absolute_error: 36.606947580973305

Best val_mean_absolute_error So Far: 36.606947580973305
Total elapsed time: 00h 00m 12s

Search: Running Trial #2

Hyperparameter    |Value             |Best Value So Far 
num_layers        |4                 |9                 
units_0           |288               |256               
units_1           |224               |320               
learning_rate     |0.0001            |0.01              
units_2           |192               |32                
uni

In [71]:
models = tuner.get_best_models(num_models=2)
models



[<tensorflow.python.keras.engine.sequential.Sequential at 0x1dbb015cbc8>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x1dbb8fb9588>]

In [72]:
tuner.results_summary()

Results summary
Results in Project\Air_Quality_Index
Showing 10 best trials
Objective(name='val_mean_absolute_error', direction='min')
Trial summary
Hyperparameters:
num_layers: 9
units_0: 256
units_1: 320
learning_rate: 0.01
units_2: 32
units_3: 32
units_4: 32
units_5: 32
units_6: 32
units_7: 32
units_8: 32
Score: 36.606947580973305
Trial summary
Hyperparameters:
num_layers: 15
units_0: 416
units_1: 384
learning_rate: 0.001
units_2: 224
units_3: 288
units_4: 288
units_5: 96
units_6: 224
units_7: 64
units_8: 64
units_9: 128
units_10: 480
units_11: 96
units_12: 32
units_13: 512
units_14: 32
Score: 37.25659688313802
Trial summary
Hyperparameters:
num_layers: 14
units_0: 320
units_1: 64
learning_rate: 0.0001
units_2: 160
units_3: 288
units_4: 96
units_5: 416
units_6: 320
units_7: 288
units_8: 96
units_9: 32
units_10: 32
units_11: 32
units_12: 32
units_13: 32
Score: 54.08935038248698
Trial summary
Hyperparameters:
num_layers: 4
units_0: 288
units_1: 224
learning_rate: 0.0001
units_2: 192
u