In [82]:
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from keras_tuner import RandomSearch 

In [83]:
# Air quality index prodiction
df=pd.read_csv(r"C:\Users\Asus\Downloads\Real_Combine.csv") 

df.shape

(1093, 9)

In [84]:
df.head(2)



Unnamed: 0,T,TM,Tm,SLP,H,VV,V,VM,PM 2.5
0,7.4,9.8,4.8,1017.6,93.0,0.5,4.3,9.4,219.720833
1,7.8,12.7,4.4,1018.5,87.0,0.6,4.4,11.1,182.1875


In [85]:
X=df.iloc[:,:-1] ## independent features
Y=df.iloc[:,-1]  ##dependent features

In [86]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Hyperparameters
1. How many number of hidden layers we should have?
2. How many number of neurons we should have in hidden layers?
3. Leraning rate

In [87]:
def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int('num_layers', 2, 10)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(1, activation='linear'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-3, 1e-4, 5e-4])),
        loss='mean_absolute_error',
        metrics=['mean_absolute_error'])
    return model

# Context:
hp is usually a HyperParameters object that gets passed into a model-building function. This object helps define which hyperparameters to search over and what values they can take.

# Breakdown of hp.Int('num_layers', 2, 20):
hp: The hyperparameter object
.Int(): A method used to define an integer-valued hyperparameter.

'num_layers': The name/key of this hyperparameter. This is how it will be referenced later.

2, 20: The lower and upper bounds for this integer hyperparameter. So the tuning algorithm can pick any integer between 2 and 20 (inclusive).

hp.Int will try with different different values whereas hp.choice will either chose one among the choised(here learning rate) 

In [93]:
tuner = RandomSearch(
    build_model,
    objective='val_mean_absolute_error',
    max_trials=10,
    executions_per_trial=3,
    directory='project',
    project_name='Air Quality Index')

tuner.oracle.max_consecutive_failed_trials = 10  # Increased tolerance


Reloading Tuner from project\Air Quality Index\tuner0.json


In [94]:
tuner.search_space_summary()

Search space summary
Default search space size: 19
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}
units_2 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units_3 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units_4 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units_5 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units_6

In [95]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.3, random_state=0)

In [96]:
tuner.search(X_train, y_train,
             epochs=10,
             validation_data=(X_test, y_test),
             verbose=2)

Trial 10 Complete [00h 00m 45s]
val_mean_absolute_error: nan

Best val_mean_absolute_error So Far: 41.41451772054037
Total elapsed time: 01h 24m 08s


In [92]:
tuner.results_summary()


Results summary
Results in project\Air Quality Index
Showing 10 best trials
Objective(name="val_mean_absolute_error", direction="min")

Trial 3 summary
Hyperparameters:
num_layers: 3
units_0: 96
units_1: 352
learning_rate: 0.001
units_2: 448
units_3: 192
units_4: 352
units_5: 384
units_6: 352
units_7: 192
units_8: 320
units_9: 128
units_10: 288
units_11: 128
units_12: 160
units_13: 160
units_14: 512
units_15: 288
units_16: 384
Score: 41.41451772054037

Trial 4 summary
Hyperparameters:
num_layers: 12
units_0: 64
units_1: 64
learning_rate: 0.0001
units_2: 288
units_3: 448
units_4: 288
units_5: 32
units_6: 448
units_7: 96
units_8: 192
units_9: 320
units_10: 160
units_11: 512
units_12: 192
units_13: 96
units_14: 416
units_15: 416
units_16: 288
Score: 48.257957458496094

Trial 1 summary
Hyperparameters:
num_layers: 3
units_0: 384
units_1: 256
learning_rate: 0.01
units_2: 512
units_3: 288
units_4: 64
units_5: 384
units_6: 256
units_7: 64
units_8: 320
units_9: 320
units_10: 448
units_11: 64
u

# Errors faced and their resolution
## 1. Number of consecutive failures exceeded the limit of 3
solution:tuner.oracle.max_consecutive_failed_trials = 10  # Increased tolerance

How to check that there is problem in data:
do the sanity check for model
If the models shows no output: check for null values in you data if not present do standardization for data

## 2. Also tried Early stoppping but it did not worked
## 3. changed the learning rate and number of layers from  2to 20  to 1 to 4

In [53]:
#Sanity check
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='linear')
])
model.compile(optimizer='adam', loss='mean_absolute_error')
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - loss: 110.5312 - val_loss: 104.7027
Epoch 2/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 109.9045 - val_loss: 101.7760
Epoch 3/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 103.6511 - val_loss: 95.1094
Epoch 4/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 97.0635 - val_loss: 82.8369
Epoch 5/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 84.0995 - val_loss: 69.0827


<keras.src.callbacks.history.History at 0x25378e8df40>

In [69]:
import numpy as np

sample = [7.4, 9.8, 4.8, 1017.6, 93.0, 0.5, 4.3, 9.4]
sample = np.array(sample).reshape(1, -1)
sample_scaled = scaler.transform(sample)

best_model = tuner.get_best_models(num_models=1)[0]
prediction = best_model.predict(sample_scaled)

print("Predicted Air Quality Index:", prediction[0][0])


  saveable.load_own_variables(weights_store.get(inner_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
Predicted Air Quality Index: 274.46295
