In [23]:
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt

In [24]:
df = pd.read_csv('Real_Combine.csv')
X = df.iloc[:,:-1]
y = df.iloc[:,-1]


In [25]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [52]:
# Drop NaNs/Infs from y_test and corresponding rows in X_test
mask = (~np.isnan(y_test)) & (~np.isinf(y_test))
X_test = X_test[mask]
y_test = y_test[mask]


In [54]:
from sklearn.preprocessing import StandardScaler

# Scale features
scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# Scale targets (optional but good for stability)
scaler_y = StandardScaler()
import numpy as np

y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test  = scaler_y.transform(y_test.reshape(-1, 1)).flatten()


In [55]:
import numpy as np

print("NaNs in X_train:", np.isnan(X_train).sum())
print("NaNs in y_train:", np.isnan(y_train).sum())
print("NaNs in X_test:", np.isnan(X_test).sum())
print("NaNs in y_test:", np.isnan(y_test).sum())

print("Inf in X_train:", np.isinf(X_train).sum())
print("Inf in y_train:", np.isinf(y_train).sum())


NaNs in X_train: 0
NaNs in y_train: 0
NaNs in X_test: 0
NaNs in y_test: 0
Inf in X_train: 0
Inf in y_train: 0


In [56]:
def build_model(hp):
    model = keras.Sequential()

    # Limit the number of layers (avoid too big models)
    for i in range(hp.Int('num_layers', 1, 3)):  
        model.add(layers.Dense(
            units=hp.Int('units_' + str(i), min_value=32, max_value=128, step=32),
            activation='relu'
        ))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(rate=0.2))  # fixed dropout to avoid instability

    # Output layer
    model.add(layers.Dense(1, activation='linear'))

    # Optimizer with safe learning rates + gradient clipping
    model.compile(
        optimizer=keras.optimizers.Adam(
            learning_rate=hp.Choice('learning_rate', [1e-3, 1e-4]),
            clipnorm=1.0
        ),
        loss='mean_absolute_error',
        metrics=['mae']
    )
    return model


In [57]:
tuner = kt.RandomSearch(
    build_model,
    objective='val_mae',
    max_trials=5,
    executions_per_trial = 1,
    directory = 'project',  
    project_name = "AirQualityindex",
    overwrite = True
)

In [44]:
tuner.search_space_summary()

Search space summary
Default search space size: 3
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 128, 'step': 32, 'sampling': 'linear'}
learning_rate (Choice)
{'default': 0.001, 'conditions': [], 'values': [0.001, 0.0001], 'ordered': True}


In [58]:
import traceback

tuner.search(
    X_train, y_train,
    epochs=3,
    validation_data=(X_test, y_test),
    verbose=1
)


Trial 5 Complete [00h 00m 04s]
val_mae: 0.6801043152809143

Best val_mae So Far: 0.6515239477157593
Total elapsed time: 00h 00m 22s
