In [2]:
# Import packages
import os
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [5]:
# Paths to folder
train_image_folder = 'images/train_images'
test_image_folder = 'images/test_images'
txt_file_path = 'txt_files/train_images.txt'

images = []
labels = []

pm25_values = []
with open(txt_file_path, 'r') as file:
  for line in file:
    pm25_values.append(float(line.strip()))

pm25_values = np.array(pm25_values, dtype = np.float32)

In [6]:
# Loop through the image folder and load images
for filename in os.listdir(train_image_folder):
  if filename.endswith('.jpg') or filename.endswith('.png'):
    # Combine the path of the image with the path of the folder
    # Each image will be its own path
    image_path = os.path.join(train_image_folder, filename)
    # Open the image
    img = Image.open(image_path)
    # Resize the image to be moreconsistent
    img = img.resize((128, 128))
    # Change the image into a numpy array and normalize its pixels [0,1]
    img_array = np.array(img) / 255.0
    # Add the img array into the images array
    images.append(img_array)
    # Match PM2.5 value to the loaded image
    labels.append(pm25_values[len(images) - 1])
    
# Convert lists to NumPy arrays
images = np.array(images, dtype = np.float32)
labels = np.array(labels, dtype = np.float32)

In [7]:
# Define the architecture of the model
model = keras.Sequential([
    # Convolutional layers
    layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation = 'relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation = 'relu'),
    layers.MaxPooling2D((2, 2)),
    # Flatten the output
    layers.Flatten(),
    # Dense layers
    layers.Dense(128, activation = 'relu'),
    layers.Dense(64, activation = 'relu'),
    layers.Dense(1)  # Output layer with a single neuron for PM2.5 prediction
])

In [8]:
# Create the model
model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mae'])

# Train the model
history = model.fit(images, labels, epochs = 300, batch_size = 32)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [15]:
#Path to the output text file
output_file_path = 'txt_files/test_images.txt'

# Initialize a list to store the predicted PM2.5 values
predicted_pm25_values = []
for filename in os.listdir(test_image_folder):
  if filename.endswith('.jpg') or filename.endswith('.png'):
    # Load and preprocess the image
    image_path = os.path.join(test_image_folder, filename)
    img = Image.open(image_path)
    img = img.resize((128, 128))
    img_array = np.array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)  # Add a batch dimension
    # Make a prediction using the model
    predicted_value = model.predict(img_array)
    # Append the predicted PM2.5 value to the list
    predicted_pm25_values.append(predicted_value[0][0])



In [16]:
# Save only the numeric PM2.5 values to the text file
with open(output_file_path, 'w') as file:
  for x in predicted_pm25_values:
    file.write(f'{x}\n')

In [17]:
# Display the results
for filename, x in zip(os.listdir(test_image_folder), predicted_pm25_values):
  print(f'Image: {filename}, Predicted AQI: {x}')

Image: img19.jpg, Predicted AQI: 69.75848388671875
Image: img20.jpg, Predicted AQI: 44.008872985839844
Image: img21.jpg, Predicted AQI: 60.33290481567383
Image: img22.jpg, Predicted AQI: 43.85430145263672
Image: img23.jpg, Predicted AQI: 65.80712127685547
Image: img24.jpg, Predicted AQI: 68.45662689208984
Image: img25.jpg, Predicted AQI: 51.8444709777832
Image: img26.jpg, Predicted AQI: 67.11910247802734
Image: img27.jpg, Predicted AQI: 70.30950164794922
Image: img28.jpg, Predicted AQI: 68.4752197265625
Image: img29.jpg, Predicted AQI: 65.47904968261719
Image: img3.jpg, Predicted AQI: 72.95126342773438
Image: img30.jpg, Predicted AQI: 67.08615112304688
Image: img31.jpg, Predicted AQI: 52.67084884643555
Image: img32.jpg, Predicted AQI: 69.15911865234375
Image: img33.jpg, Predicted AQI: 34.904014587402344
Image: img34.jpg, Predicted AQI: 78.55274963378906


In [None]:
from sklearn.metrics import mean_absolute_error

# Assuming 'test_labels' contains the true values
# and 'predicted_pm25_values' contains the predicted values
# Basically, this variable contains the true values inside the "test_images" folder
test_labels = [67, 46, 29, 29, 35, 41, 31, 43, 26, 34, 40, 37, 49, 46, 40, 52, 23]

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(test_labels, predicted_pm25_values)
print(f'Mean Absolute Error (MAE): {mae}')