# Comparison of Air Quality Index (AQI) Prediction Based on AlexNet, VGGNet, ResNet

Kelompok 01 Kecerdasan buatan 02:
* Fateen Najib Indramustika - 2006468522
* Joshevan - 2006577321
* Airell Ramadhan Budiraharjo - 2006535230

In [None]:
import os
import pandas as pd
from datetime import datetime

print(os.listdir())
os.chdir('/kaggle/input/aqi-predictions')
image_files = os.listdir('image')
image_timestamps = [datetime.strptime(os.path.splitext(file)[0][0:15], "%Y%m%d_%H%M%S") for file in image_files]

data = {'File Name': image_files, 'Timestamp': image_timestamps}
df_image = pd.DataFrame(data)

image_timestamps = [os.path.splitext(file)[0] for file in image_files]

aqi_data = pd.read_csv('/kaggle/input/aqi-predictions/air_quality_data.csv')

aqi_timestamps = aqi_data['Now Timestamp'].tolist()

aqi_timestamps = [datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%f") for timestamp in aqi_timestamps]

data = {'Timestamp': aqi_timestamps, 'AQI': aqi_data['AQI']}
df_aqi = pd.DataFrame(data)
print(df_aqi)

In [None]:
def find_nearest(row, df, column='Timestamp'):
    absolute_difference_function = lambda x: abs(x - row['Timestamp'])
    nearest_timestamp = df[column].apply(absolute_difference_function).idxmin()
    return df.loc[nearest_timestamp]

nearest_aqi = df_image.apply(find_nearest, args=(df_aqi,), axis=1)

# Add the columns from nearest_aqi to df_image
df_image = pd.concat([df_image, nearest_aqi], axis=1)

In [None]:
print(df_image)
df_image.drop(columns=['Timestamp'], inplace=True)
df_image

In [None]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(df_image, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_dataframe(
    train_data,
    directory='image/',
    x_col='File Name',
    y_col='AQI',
    target_size=(224, 224),
    batch_size=16,
    class_mode='raw'
)
test_generator = test_datagen.flow_from_dataframe(
    test_data,
    directory='image/',
    x_col='File Name',
    y_col='AQI',
    target_size=(224, 224),
    batch_size=16,
    class_mode='raw',
    shuffle=False
)

In [None]:
from tensorflow import keras
from keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten

base_model = VGG16(weights='imagenet')

for layer in base_model.layers:
  layer.trainable = False
                   
# Add new layers for regression
x = base_model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dense(1, activation='linear')(x)  # Output layer for regression

# Create the new model
model = Model(inputs=base_model.input, outputs=x)

opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_error',keras.metrics.RootMeanSquaredError()])

model.summary()

In [None]:
epochs = 100

history = model.fit(
    train_generator,
    steps_per_epoch=4,
    validation_data=test_generator,
    validation_steps=4,
    epochs=epochs
)

In [None]:
import matplotlib.pyplot as plt

val_mae = history.history['val_root_mean_squared_error']
mae = history.history['root_mean_squared_error']

plt.figure(figsize=(12, 6))
plt.plot(val_mae)
plt.plot(mae)
plt.title('RMSE over epochs')
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend(['Validation RMSE', 'RMSE'])
plt.show()

In [None]:
import matplotlib.pyplot as plt

val_mae = history.history['val_mean_absolute_error']
mae = history.history['mean_absolute_error']

plt.figure(figsize=(12, 6))
plt.plot(val_mae)
plt.plot(mae)
plt.title('MAE over epochs')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.legend(['Validation MAE', 'MAE'])
plt.show()

In [None]:
import matplotlib.pyplot as plt

val_lost = history.history['val_loss']
lost = history.history['loss']

plt.figure(figsize=(12, 6))
plt.plot(val_lost)
plt.plot(lost)
plt.title('Loss over epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Validation Loss', 'Loss'])
plt.show()

In [None]:
test_loss, test_mae, test_rmse  = model.evaluate(test_generator, steps=len(test_data) // 32)
print(f"Test Loss: {test_loss}")
print(f"Test MAE: {test_mae}")
print(f"Test RMSE: {test_rmse}")

In [None]:
predictions = model.predict(test_generator)

df_result = pd.DataFrame({
    'File Name': test_data['File Name'],
    'Actual AQI': test_data['AQI'],
    'Predicted AQI': predictions.flatten()
})
df_result

In [None]:
import matplotlib.pyplot as plt

df_result = df_result.sort_index()

plt.figure(figsize=(10, 6))
plt.plot(df_result['Actual AQI'], label='Actual AQI')
plt.plot(df_result['Predicted AQI'], label='Predicted AQI')
plt.title('Actual AQI vs Predicted AQI')
plt.xlabel('Index')
plt.ylabel('AQI')
plt.legend()
plt.show()