# Comparison of Air Quality Index (AQI) Prediction Based on AlexNet, VGGNet, ResNet

Kelompok 01 Kecerdasan buatan 02:
* Fateen Najib Indramustika - 2006468522
* Joshevan - 2006577321
* Airell Ramadhan Budiraharjo - 2006535230

## Import Dependency

In [None]:
import csv
import os
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np


## Import dataset

In [None]:
def match_images_with_csv(csv_file_path, images_folder_path, timezone_offset=7):
    timestamp_AQI_mapping = {}
    with open(csv_file_path, mode='r') as csvfile:
        csv_reader = csv.DictReader(csvfile)
        for row in csv_reader:
            now_timestamp = datetime.fromisoformat(row['Now Timestamp'])
            now_timestamp += timedelta(hours=timezone_offset)
            formatted_timestamp = now_timestamp.strftime('%Y%m%d_%H%M')
            formatted_timestamp += "00"
            timestamp_AQI_mapping[formatted_timestamp] = row['AQI']

    x = []
    y = []

    for image_file in os.listdir(images_folder_path):
        if image_file.endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(images_folder_path, image_file)
            image_timestamp = image_file.split('.')[0]

            # Match the image timestamp with the CSV data
            if image_timestamp in timestamp_AQI_mapping:
                AQI_value = timestamp_AQI_mapping[image_timestamp]
                y.append(int(AQI_value))
                x.append(image_path.split('\\')[-1])
    data = {}
    data['image_path'] = x
    data['AQI'] = y
    return data

In [None]:
data = match_images_with_csv('air_quality_data.csv', 'image')
df_aqi = pd.DataFrame(data)


## Preprocessing data for tensorflow


In [None]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(df_aqi, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=0.2)
test_datagen = ImageDataGenerator(rescale=0.2)

In [None]:
train_generator = train_datagen.flow_from_dataframe(
    train_data,
    directory='image/',
    x_col='image_path',
    y_col='AQI',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw'
)
test_generator = test_datagen.flow_from_dataframe(
    test_data,
    directory='image/',
    x_col='image_path',
    y_col='AQI',
    target_size=(224, 224),
    batch_size=32,
    class_mode='raw',
    shuffle=False
)


## AlexNet for multiclass classification with 200 classes

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten,
                                     Dense, Dropout, BatchNormalization)

def alexnet_model(input_shape=(227, 227, 3)):
    model = Sequential([
        # 1st Convolutional Layer
        Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D(pool_size=(3,3), strides=(2,2)),

        # 2nd Convolutional Layer
        Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
        BatchNormalization(),
        MaxPooling2D(pool_size=(3,3), strides=(2,2)),

        # 3rd, 4th, and 5th Convolutional Layers
        Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        MaxPooling2D(pool_size=(3,3), strides=(2,2)),

        # Flattening Layer
        Flatten(),

        # 1st Dense Layer
        Dense(4096, activation='relu'),
        Dropout(0.5),

        # 2nd Dense Layer
        Dense(4096, activation='relu'),
        Dropout(0.5),

        # 3rd Dense Layer
        Dense(1000, activation='relu'), 
        Dropout(0.5),

        # Output Layer
        Dense(200, activation='softmax') 
    ])

    return model

# Create the model
model = alexnet_model()

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()


In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_data) // 32,
    validation_data=test_generator,
    validation_steps=len(test_data) // 32,
    epochs=100
)

In [None]:
train_accuracy = history.history['accuracy'].copy()
val_accuracy = history.history['val_accuracy'].copy()

for i in range(0, len(train_accuracy)):
    train_accuracy[i] *= 100

for i in range(0, len(val_accuracy)):
    val_accuracy[i] *= 100

plt.plot(train_accuracy)
plt.plot(val_accuracy)
plt.title('Model Accuracy')
plt.ylabel('Accuracy (%)')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.text(80, val_accuracy[99], f'Validation Score: {val_accuracy[99]:.2f}', ha='center', va='bottom')
plt.savefig('accuracy.png')
plt.show()


## AlexNet for Regression

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten,
                                     Dense, Dropout, BatchNormalization)
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model


def alexnet_model(input_shape=(227, 227, 3)):
    model = Sequential([
        # 1st Convolutional Layer
        Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D(pool_size=(3,3), strides=(2,2)),

        # 2nd Convolutional Layer
        Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
        BatchNormalization(),
        MaxPooling2D(pool_size=(3,3), strides=(2,2)),

        # 3rd, 4th, and 5th Convolutional Layers
        Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
        MaxPooling2D(pool_size=(3,3), strides=(2,2)),

        GlobalAveragePooling2D(),

        # Dense Layer with 1024 units
        Dense(1024, activation='relu'),

        # Dense Layers with 4096 units 
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        
        # Output Layer for continuous value prediction
        Dense(1)
    ])

    return model

# Create the model
model = alexnet_model()

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error',tf.keras.metrics.RootMeanSquaredError()])

# Summary of the model
model.summary()

In [None]:
x_batch, y_batch = next(train_generator)
print(f'x_batch shape: {x_batch.shape}, dtype: {x_batch.dtype}')
print(f'y_batch shape: {y_batch.shape}, dtype: {y_batch.dtype}')

# Inspecting a batch from test_generator
x_val_batch, y_val_batch = next(test_generator)
print(f'x_val_batch shape: {x_val_batch.shape}, dtype: {x_val_batch.dtype}')
print(f'y_val_batch shape: {y_val_batch.shape}, dtype: {y_val_batch.dtype}')


In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_data) // 32,
    validation_data=test_generator,
    validation_steps=len(test_data) // 32,
    epochs=100  # Adjust the number of epochs based on your needs
)

In [None]:
import matplotlib.pyplot as plt

val_lost = history.history['val_loss']
lost = history.history['loss']

plt.figure(figsize=(12, 6))
plt.plot(val_lost)
plt.plot(lost)
plt.title('Loss over epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss') 
plt.legend(['Validation Loss', 'Loss'])
plt.show()


In [None]:
train_rmse = history.history['mean_absolute_error']
val_rmse = history.history['val_mean_absolute_error']

plt.figure(figsize=(12, 6))
plt.plot(train_rmse)
plt.plot(val_rmse)
plt.title('RMSE over epochs')
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend(['Train RMSE', 'Validation RMSE'])
plt.show()

In [None]:
test_loss, test_mae, test_rmse  = model.evaluate(test_generator, steps=len(test_data) // 32)
print(f"Test Loss: {test_loss}")
print(f"Test MAE: {test_mae}")
print(f"Test RMSE: {test_rmse}")


In [None]:
train_rmse = history.history['mean_absolute_error']
val_rmse = history.history['val_mean_absolute_error']

plt.figure(figsize=(12, 6))
plt.plot(train_rmse)
plt.plot(val_rmse)
plt.title('RMSE over epochs')
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.text(450,200, f'Validation RMSE: {val_rmse[499]:.4f}', ha='center', va='bottom')
plt.text(450,200, f'Testing RMSE: {train_rmse[499]:.4f}', ha='center', va='top')
plt.legend(['Train RMSE', 'Validation RMSE'])
plt.show()

In [None]:
import matplotlib.pyplot as plt

df_result = df_result.sort_index()
plt.figure(figsize=(10, 6))
plt.plot(df_result['Actual AQI'], label='Actual AQI')
plt.plot(df_result['Predicted AQI'], label='Predicted AQI')
plt.title('Actual AQI vs Predicted AQI')
plt.xlabel('Index')
plt.ylabel('AQI')
plt.legend()
plt.show()