In [58]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from torch.utils.data.dataset import Subset
import os
from PIL import Image
import joblib
from torchvision import transforms
import pandas as pd


In [48]:
#custom PyTorch Dataset class.
# It‚Äôs a data loader blueprint that tells PyTorch:
# üëâ ‚ÄúHere‚Äôs how to find my data‚Äù
# üëâ ‚ÄúHere‚Äôs how to read each image‚Äù
# üëâ ‚ÄúHere‚Äôs how to get the labels‚Äù
class AirQualityDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform
        self.label_cols = ['AQI','PM2.5','PM10','O3','CO','SO2','NO2']

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        filename = row["Filename"].strip()  # remove leading/trailing spaces
        img_path = os.path.join(self.img_dir, filename)

        if not os.path.exists(img_path):
            print(f"File not found: {img_path}")
            return None  # optionally skip this sample

        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)

        # Convert labels to float
        labels = torch.tensor(
            row[self.label_cols].astype(float).values,
            dtype=torch.float32
        )

        return img, labels


In [49]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

test_dataset = torch.load("/Users/avanigupta/pm-estimation-from-images/models/test_dataset.pt", weights_only=False)
test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False
)

In [50]:
def get_model(model_name):
    if model_name == "resnet18":
        model = models.resnet18(
            weights=models.ResNet18_Weights.IMAGENET1K_V1
        )
        model.fc = nn.Linear(512, 7)

    elif model_name == "resnet34":
        model = models.resnet34(
            weights=models.ResNet34_Weights.IMAGENET1K_V1
        )
        model.fc = nn.Linear(512, 7)

    elif model_name == "mobilenet_v2":
        model = models.mobilenet_v2(
            weights=models.MobileNet_V2_Weights.IMAGENET1K_V1
        )
        model.classifier[1] = nn.Linear(
            model.classifier[1].in_features, 7
        )

    else:
        raise ValueError("Unknown model name")

    return model


In [51]:
def evaluate_model(model, dataloader):
    model.eval()
    preds_all = []
    labels_all = []

    with torch.no_grad():
        for imgs, labels in dataloader:
            imgs = imgs.to(device)
            labels = labels.to(device)

            preds = model(imgs)

            preds_all.append(preds.cpu().numpy())
            labels_all.append(labels.cpu().numpy())

    preds_all = np.vstack(preds_all)
    labels_all = np.vstack(labels_all)

    return preds_all, labels_all

In [37]:
def compute_metrics(y_true, y_pred):
    return {
        "MSE": mean_squared_error(y_true, y_pred),
        "RMSE": mean_squared_error(y_true, y_pred),
        "MAE": mean_absolute_error(y_true, y_pred),
        "R2": r2_score(y_true, y_pred)
    }

In [40]:
model_names = [
    "mobilenet_v2",
    "resnet18",
    "resnet34",

]
results = {}

for name in model_names:
    model = get_model(name).to(device)
    model.load_state_dict(
        torch.load(f"/Users/avanigupta/pm-estimation-from-images/models/{name}_aqi.pth", map_location=device)
    )


    y_pred, y_true = evaluate_model(model, test_loader)
    metrics = compute_metrics(y_true, y_pred)

    results[name] = metrics
    print(f"\n{name} results:")
    for k, v in metrics.items():
        print(f"{k}: {v:.4f}")


mobilenet_v2 results:
MSE: 0.0158
RMSE: 0.0158
MAE: 0.0779
R2: 0.9685

resnet18 results:
MSE: 0.0176
RMSE: 0.0176
MAE: 0.0864
R2: 0.9650

resnet34 results:
MSE: 0.0116
RMSE: 0.0116
MAE: 0.0610
R2: 0.9768


In [41]:
labels = ['AQI','PM2.5','PM10','O3','CO','SO2','NO2']

for i, label in enumerate(labels):
    print(f"\n{label}")
    print("MAE:", mean_absolute_error(y_true[:, i], y_pred[:, i]))


AQI
MAE: 0.07046648114919662

PM2.5
MAE: 0.05581687018275261

PM10
MAE: 0.0632089301943779

O3
MAE: 0.0635099709033966

CO
MAE: 0.051858607679605484

SO2
MAE: 0.061200570315122604

NO2
MAE: 0.06108921021223068


In [45]:
import torch

# Load the dataset
data = torch.load("/Users/avanigupta/pm-estimation-from-images/models/test_dataset.pt", weights_only=False)

# Check what type of object it is
print(type(data))

# If it's a list or dataset, iterate and print some examples
for i, item in enumerate(data):
    # Stop after 5 examples
    if i >= 5:
        break

    # Try to print filename or metadata
    if isinstance(item, dict):
        # common pattern: {'image': tensor, 'filename': 'xxx.jpg', ...}
        print(item.get("filename", "No filename key"))
    else:
        print(item)

<class 'torch.utils.data.dataset.Subset'>
(tensor([[[ 1.0502,  1.0502,  1.0502,  ...,  0.8104,  0.8104,  0.8104],
         [ 1.0502,  1.0502,  1.0502,  ...,  0.8104,  0.8104,  0.8104],
         [ 1.0502,  1.0502,  1.0502,  ...,  0.8104,  0.8104,  0.8104],
         ...,
         [-0.9192, -0.8849, -1.5014,  ...,  1.2043,  0.2111,  0.6563],
         [-0.5082, -0.7822, -1.7583,  ...,  0.8961,  0.8104,  0.5193],
         [-0.9020, -0.6452, -1.5357,  ...,  0.7248,  0.9303,  0.5878]],

        [[ 1.3782,  1.3782,  1.3782,  ...,  1.1155,  1.1155,  1.1155],
         [ 1.3782,  1.3782,  1.3782,  ...,  1.1155,  1.1155,  1.1155],
         [ 1.3782,  1.3782,  1.3782,  ...,  1.1155,  1.1155,  1.1155],
         ...,
         [-1.0903, -1.0203, -1.6331,  ...,  0.6604, -0.3550,  0.1001],
         [-0.6352, -0.9153, -1.8957,  ...,  0.3803,  0.2927, -0.0049],
         [-1.0378, -0.7752, -1.6681,  ...,  0.2052,  0.4153,  0.0651]],

        [[ 1.6291,  1.6291,  1.6291,  ...,  1.4548,  1.4548,  1.4548],
  

In [52]:
from torchvision import models, transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "resnet18"  # choose the trained model
model_path = f"/Users/avanigupta/pm-estimation-from-images/models/{model_name}_aqi.pth"
img_path = "/Users/avanigupta/pm-estimation-from-images/data/archive/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/All_img/BENGR_Good_2023-02-19-08.30-1-1.jpg"
LABEL_COLS = ['AQI','PM2.5','PM10','O3','CO','SO2','NO2']
# ----------------------------
# Load model
# ----------------------------
model = get_model(model_name).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()  # important!

# ----------------------------
# Preprocess image
# ----------------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

img = Image.open(img_path).convert("RGB")
img_tensor = transform(img).unsqueeze(0).to(device)  # add batch dimension

# ----------------------------
# Predict
# ----------------------------
with torch.no_grad():
    output = model(img_tensor)

# Convert to list
preds = output.squeeze(0).cpu().numpy()

# Map to labels
pred_dict = dict(zip(LABEL_COLS, [float(x) for x in preds]))

print(pred_dict)

{'AQI': -0.7380762100219727, 'PM2.5': -0.2215963751077652, 'PM10': -0.43026039004325867, 'O3': -0.2799919843673706, 'CO': 0.7382581830024719, 'SO2': -0.3708515167236328, 'NO2': -0.18658989667892456}


In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "resnet34"
model_path = f"/Users/avanigupta/pm-estimation-from-images/models/{model_name}_aqi.pth"
scaler_path = "/Users/avanigupta/pm-estimation-from-images/models/label_scaler.save"

LABEL_COLS = ['AQI','PM2.5','PM10','O3','CO','SO2','NO2']

# ----------------------------
# Load model
# ----------------------------
model = get_model(model_name).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

# ----------------------------
# Load scaler
# ----------------------------
scaler = joblib.load("/Users/avanigupta/pm-estimation-from-images/models/label_scaler.save")

# ----------------------------
# Preprocess image
# ----------------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

img = Image.open(img_path).convert("RGB")
img_tensor = transform(img).unsqueeze(0).to(device)

# ----------------------------
# Predict (scaled values)
# ----------------------------
with torch.no_grad():
    output = model(img_tensor)

scaled_preds = output.cpu().numpy()  # shape (1, 7)

# ----------------------------
# Convert back to real values
# ----------------------------
real_preds = scaler.inverse_transform(scaled_preds)[0]

pred_dict = {label: float(val) for label, val in zip(LABEL_COLS, real_preds)}
print(pred_dict)

{'AQI': 55.045692443847656, 'PM2.5': 25.150537490844727, 'PM10': 56.691078186035156, 'O3': 20.021852493286133, 'CO': 174.64341735839844, 'SO2': 4.274399757385254, 'NO2': 13.922660827636719}


In [55]:
filename = os.path.basename(img_path)
print(filename)

BENGR_Good_2023-02-19-08.30-1-1.jpg


In [59]:
df = pd.read_csv("/Users/avanigupta/pm-estimation-from-images/data/archive/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/IND_and_Nep_AQI_Dataset.csv")
row = df[df["Filename"] == filename]

In [61]:
actual_values = row[LABEL_COLS].values[0]  # numpy array
actual_dict = dict(zip(LABEL_COLS, actual_values))
for label in LABEL_COLS:
    print(f"{label:6} | Pred: {pred_dict[label]:8.2f} | Actual: {actual_dict[label]:8.2f}")

AQI    | Pred:    55.05 | Actual:    41.00
PM2.5  | Pred:    25.15 | Actual:    20.00
PM10   | Pred:    56.69 | Actual:    36.00
O3     | Pred:    20.02 | Actual:     9.00
CO     | Pred:   174.64 | Actual:   165.00
SO2    | Pred:     4.27 | Actual:     3.00
NO2    | Pred:    13.92 | Actual:     8.00


In [None]:
# AQI range: 15 to 450
# PM2.5 range: 4.0 to 500.0
# PM10 range: 7.0 to 480.0
# O3 range: 1.0 to 225.0
# CO range: 0.0 to 410.0
# SO2 range: 2.0 to 57.0
# NO2 range: 0.67 to 169.0