## Preprocessing Data
[World Air Quality Index by City and Coordinates Dataset](https://www.kaggle.com/datasets/adityaramachandran27/world-air-quality-index-by-city-and-coordinates/code)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

# read data
data = pd.read_csv("/content/AQI and Lat Long of Countries.csv")

# drop AQI value
data = data.drop("AQI Value", axis = 1)

In [None]:
data.head()

Unnamed: 0,Country,AQI Category,lat,lng
0,Russian Federation,Moderate,44.7444,44.2031
1,Brazil,Good,-5.29,-44.49
2,Brazil,Good,-11.2958,-41.9869
3,Italy,Moderate,37.1667,15.1833
4,Poland,Good,53.0167,20.8833


In [None]:
import numpy as np
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

ct = make_column_transformer(
    (MinMaxScaler(), ["lat", "lng"]),
    (OneHotEncoder(handle_unknown = "ignore", sparse = False), ["Country"])
)

ct_y = OneHotEncoder(categories='auto')

In [None]:
# create x and y value
X = data.drop("AQI Category", axis = 1)
y = data["AQI Category"]

In [None]:
X, y

(                        Country      lat      lng
 0            Russian Federation  44.7444  44.2031
 1                        Brazil  -5.2900 -44.4900
 2                        Brazil -11.2958 -41.9869
 3                         Italy  37.1667  15.1833
 4                        Poland  53.0167  20.8833
 ...                         ...      ...      ...
 16690  United States of America  37.5516 -77.3285
 16691                  Slovakia  49.0650  18.9219
 16692                  Slovakia  36.3385 -88.8513
 16693                    France  48.7786   2.2906
 16694  United States of America  40.1241 -82.9210
 
 [16695 rows x 3 columns],
 0        Moderate
 1            Good
 2            Good
 3        Moderate
 4            Good
            ...   
 16690    Moderate
 16691    Moderate
 16692    Moderate
 16693        Good
 16694    Moderate
 Name: AQI Category, Length: 16695, dtype: object)

In [None]:
y = ct_y.fit_transform(y.values.reshape(-1,1))

In [None]:
print(y.toarray())

[[0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 ...
 [0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]]


In [None]:
y = y.toarray()

In [None]:
y

array([[0., 0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       ...,
       [0., 0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.]])

## Train test split

In [None]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# fit column transformer
ct.fit(X_train)



In [None]:
# transform train and test data
X_train_normal = ct.transform(X_train)
X_test_normal = ct.transform(X_test)

In [None]:
X_train_normal[0]

array([0.28133349, 0.90145519, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       1.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [None]:
X_test_normal[0]

array([0.84986327, 0.50387714, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

## Build a neural network

In [None]:
# create model
model_1 = tf.keras.Sequential([
    tf.keras.layers.Dense(3, activation = "relu"),
    tf.keras.layers.Dense(3, activation = "relu"),
    tf.keras.layers.Dense(3, activation = "relu"),
    tf.keras.layers.Dense(6, activation = "softmax")
])

# compile the model
model_1.compile(loss = "categorical_crossentropy",
              optimizer = tf.keras.optimizers.Adam(),
              metrics = ["accuracy"])

# fit the model
history_1 = model_1.fit(X_train_normal, y_train, epochs = 50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
model_1.predict(X_test_normal)



array([[9.20302272e-01, 3.09122363e-08, 7.90052563e-02, 1.37291254e-05,
        6.77800155e-04, 9.90551371e-07],
       [2.01845139e-01, 6.68098358e-03, 4.59002405e-01, 1.49566099e-01,
        1.62128091e-01, 2.07772236e-02],
       [8.10487429e-04, 9.68123134e-03, 8.49578381e-02, 6.29599810e-01,
        2.50700235e-01, 2.42503155e-02],
       ...,
       [6.78737938e-01, 9.16887893e-06, 3.05846542e-01, 1.39284309e-03,
        1.38986325e-02, 1.14934053e-04],
       [7.83074573e-02, 4.82441299e-02, 2.37234116e-01, 3.87500942e-01,
        1.68373927e-01, 8.03394541e-02],
       [5.72751641e-01, 3.21804539e-09, 4.23262864e-01, 2.51457222e-05,
        3.96009721e-03, 3.18402925e-07]], dtype=float32)

In [None]:
model_1.evaluate(X_test_normal, y_test)



[0.82264244556427, 0.6427074074745178]

In [None]:
pip install -U keras-tuner



In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

In [150]:
def build_model_function(hp):
    model = keras.Sequential()
    model.add(layers.Dense(units=hp.Int('units', min_value=2,
                           max_value=128, step=32), activation='relu'))
    model.add(layers.Dense(units=hp.Int('units', min_value=4,
                           max_value=64, step=32), activation='relu'))
    model.add(layers.Dense(units=hp.Int('units', min_value=2,
                           max_value=128, step=32), activation='relu'))
    model.add(layers.Dense(6, activation='softmax'))
    model.compile(optimizer=keras.optimizers.Adam(
        hp.Choice('learning_rate',values=[0.001])),
        loss='categorical_crossentropy',
        metrics=['accuracy'])
    return model

In [151]:
tuner = RandomSearch(
    build_model_function,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=3,
    directory='my_dir3',
    project_name='helloworld')
# You can print a summary of the search space:
tuner.search_space_summary()
# The call to search has the same signature as model.fit()
tuner.search(X_train_normal, y_train, epochs=5, validation_data=(X_test_normal, y_test))
# When search is over, you can retrieve the best model(s):
models = tuner.get_best_models(num_models=2)
# Or print a summary of the results:
tuner.results_summary()

Trial 4 Complete [00h 00m 17s]
val_accuracy: 0.6435060302416483

Best val_accuracy So Far: 0.6435060302416483
Total elapsed time: 00h 01m 05s
Results summary
Results in my_dir3/helloworld
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 03 summary
Hyperparameters:
units: 34
learning_rate: 0.001
Score: 0.6435060302416483

Trial 02 summary
Hyperparameters:
units: 66
learning_rate: 0.001
Score: 0.6434062123298645

Trial 00 summary
Hyperparameters:
units: 98
learning_rate: 0.001
Score: 0.6426075498263041

Trial 01 summary
Hyperparameters:
units: 2
learning_rate: 0.001
Score: 0.4817809859911601


**Note**:
Got 64% of accuracy for predicting 6 different Air Quality Categories