In [1]:
import os
from glob import glob
from warnings import simplefilter

In [2]:
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=UserWarning)

#### Configure path parameters and read data files

In [None]:
MIDS_USER = os.getenv("MIDS_USER", None)  # this can be replaced, with e.g. `m25...`

if MIDS_USER is None:
  ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), *['..'] * 1)) 
else:
  ROOT_DIR = f"/home/mids/{MIDS_USER}/Capstone"

IMAGES_DIR = os.path.join(ROOT_DIR, "data", "datasets", "aircraft", "images")
if not os.path.exists(IMAGES_DIR):
  ROOT_DIR = os.path.join(ROOT_DIR, "lc-14")
  IMAGES_DIR = os.path.join(ROOT_DIR, "data", "datasets", "aircraft", "images")

RADOM_SEED = 2020
TRAIN_FRAC = 0.8
VAL_FRAC = 0.1

NUM_EPOCHS = 20  # This can be adjusted as needed 

In [4]:
image_dir = IMAGES_DIR
labels_fp = os.path.join(ROOT_DIR, "data", "datasets", "aircraft", "annotations.csv")

#### Load utility functions

In [5]:
os.chdir(ROOT_DIR)

In [6]:
import torch
import torch.nn as nn
import albumentations as A
from albumentations.pytorch import ToTensorV2

from src.utilities.data.aircraft_dataloader import get_dataloader

#### Define a minimal transformation pipeline

In [7]:
transformations = A.Compose([
  A.Resize(256, 256),  # This transform resizes each image before subsequent processing
  A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # This transformation normalizes from standard RGB to grayscale
  ToTensorV2(),  # This transform maps our image to a torch.Tensor object
])


#### Building a `torch.Dataloader`.

In [8]:
# Create the DataLoader
dataloader = get_dataloader(
  image_dir=image_dir,
  labels_fp=labels_fp,
  transformations=transformations,
  mode='train',
  train_frac=TRAIN_FRAC,
  val_frac=VAL_FRAC,
  seed=RADOM_SEED,
  batch_size=1,
  shuffle=True,
  num_workers=1,
)

#### Building a baseline MLP using `torch.nn.Module`

In [None]:
class BaselineMLP(nn.Module):
  def __init__(self):
    super(BaselineMLP, self).__init__()
    self.flatten = nn.Flatten()  # This layer flattens the input to a 1D tensor
    self.fc1 = nn.Linear(256 * 256 * 3, 128)  # 256 * 256 * 3 is the number of input features
    self.fc2 = nn.Linear(128, 64)  # 128 is the number of output features from the previous layer
    self.fc3 = nn.Linear(64, 1)  # 64 is the number of output features from the previous layer

  def forward(self, x):
    x = self.flatten(x)
    x = torch.relu(self.fc1(x))  # include a nonlinearity between each fully connected layer
    x = torch.relu(self.fc2(x))
    x = self.fc3(x)
    return x


#### Training the model

In [None]:
# Basic hyperparameters
learning_rate = 1e-3

In [None]:
baseline_mlp_model = BaselineMLP()
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = torch.optim.Adam(baseline_mlp_model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(NUM_EPOCHS):
  for i, (images, targets) in enumerate(dataloader):
    if i == len(dataloader) - 1: continue  # save the last batch for demonstration
    # Forward pass
    outputs = baseline_mlp_model(images)
    loss = criterion(outputs.squeeze(), targets)  # Ensure outputs are squeezed to match counts shape

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(f"Epoch [{epoch+1}/{NUM_EPOCHS}], Loss: {loss.item():.4f}")


Epoch [1/10], Loss: 2428.9146
Epoch [2/10], Loss: 0.0791
Epoch [3/10], Loss: 308.8823
Epoch [4/10], Loss: 262.7151
Epoch [5/10], Loss: 12.2399
Epoch [6/10], Loss: 3.0773
Epoch [7/10], Loss: 39.0467
Epoch [8/10], Loss: 12.6623
Epoch [9/10], Loss: 64.6397
Epoch [10/10], Loss: 3.1391


Check the model's prediction on the validation set for a `torch.Dataset` with the same `train_frac`, `val_frac`, and `seed`.

In [12]:
# Create the DataLoader
val_dataloader = get_dataloader(
  image_dir,
  labels_fp,
  transformations=transformations,
  mode='val',
  train_frac=TRAIN_FRAC,
  val_frac=VAL_FRAC,
  seed=RADOM_SEED,
  batch_size=1,
  shuffle=False,
  num_workers=1,
)

In [13]:
losses = []

for i, (val_images, targets) in enumerate(val_dataloader):
  predicted_counts = baseline_mlp_model(val_images)
  # We validate based on the mean absolute error
  losses.append(torch.abs(predicted_counts - targets).item())
  print(f"val image {i+1}, predicted count: {predicted_counts.item():.4f}, true count: {targets.item():.4f}")

mean_loss = sum(losses) / len(losses)
print(f"Mean absolute error: {mean_loss:.4f}")

val image 1, predicted count: 14.8642, true count: 31.0000
val image 2, predicted count: 21.4533, true count: 24.0000
val image 3, predicted count: 14.4551, true count: 19.0000
val image 4, predicted count: 39.5331, true count: 49.0000
val image 5, predicted count: 28.1392, true count: 27.0000
val image 6, predicted count: 4.8535, true count: 15.0000
val image 7, predicted count: 18.2386, true count: 30.0000
val image 8, predicted count: 46.2416, true count: 52.0000
val image 9, predicted count: 20.0325, true count: 39.0000
val image 10, predicted count: 1.6681, true count: 26.0000
Mean absolute error: 10.4799
