# IPEO Project: Hurricane Damage Detection with Deep Learning

# 1. Setup

### 1.1 Install dependencies

In [None]:
!pip install -U -q torch torchvision matplotlib tqdm gdown 

### 1.2 Check if GPU is available

In [None]:
import torch
from torchvision import transforms, datasets
from torch.utils.data import WeightedRandomSampler, DataLoader
from collections import Counter

print(torch.cuda.is_available())

## 2. Data Loading

Data is already downloaded and stored in local IPEO folder

In [None]:
!unzip ipeo_hurricane_damage.zip -d ipeo_hurricane_for_students # Use only one time to unzip data, I now have it downloaded locally and on the cluster

### 2.1 Write a PyTorch Dataset class

In [None]:
# Import Hurricane dataset, so that it is not on the __main__
from dataset import Hurricane, PoissonNoise

Look at a random sample to understand what the pictures look like.

In [None]:
import matplotlib.pyplot as plt
from ipywidgets import interact

dataset = Hurricane(
    root_dir="ipeo_hurricane_for_students", # Change this path if needed
    split="train",
    transforms=None
)

print(len(dataset))        # should be >0
print(dataset.data[:5])    # should show tuples (image_path, label)

@interact(idx=range(len(dataset)))
def plot_sample(idx=10000):
    img, label = dataset[idx]

    plt.figure(figsize=(5,5))
    plt.imshow(img)
    plt.title(f"Label: {label}")
    plt.axis("off")
    plt.show()

### 2.2 Define transforms

First, I calculate the mean and standard deviation of my test data to normalize the data

In [None]:
from torch.utils.data import DataLoader
from torchvision.transforms import v2
import numpy as np

# Temporary transform to load images as tensors (no normalization!)
to_tensor = v2.ToTensor()

def compute_mean_std(dataset, batch_size=32):
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    n_pixels = 0
    mean_sum = torch.zeros(3)
    std_sum = torch.zeros(3)

    for images, _ in loader: 
        imgs = images

        n_pixels += imgs.numel() / 3  # total per channel
        mean_sum += imgs.sum(dim=[0,2,3])
        std_sum += (imgs ** 2).sum(dim=[0,2,3])

    mean = mean_sum / n_pixels
    std = torch.sqrt(std_sum / n_pixels - mean**2)
    return mean, std


# Now I calculate the mean and std of the training dataset

train_dataset_noaug = Hurricane(
    root_dir="ipeo_hurricane_for_students",
    split="train",
    transforms=to_tensor     # only convert to tensor
)

mean, std = compute_mean_std(train_dataset_noaug)
print("Mean:", mean)
print("Std:", std)


I now implement transformations to the training data for data augmentation

In [None]:
from torch.utils.data import DataLoader
from torchvision.transforms import v2
import numpy as np

# mean and standard deviation of the dataset
mean= torch.tensor([0.3518, 0.3832, 0.2849])
std= torch.tensor([0.1439, 0.1243, 0.1257])
    
# normalize image [0-1] (or 0-255) to zero-mean unit standard deviation
normalize = v2.Normalize(mean, std)

# Normalize for ResNet model
normalize_resnet = transforms.Normalize([0.485,0.456,0.406],                   # ImageNet Normalization
                                        [0.229,0.224,0.225])

    

# I invert normalization for plotting later
std_inv = 1 / (std + 1e-7)
unnormalize = v2.Normalize(-mean * std_inv, std_inv)

transforms_train = v2.Compose([
  v2.RandomResizedCrop((200, 200)),
  v2.RandomGrayscale(),
  v2.RandomHorizontalFlip(),
  v2.RandomVerticalFlip(),
  v2.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
  v2.RandomPosterize(bits=2),
  v2.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
  v2.Resize((224, 224)),
  v2.ToTensor(),
  PoissonNoise(lam=30.0), 
  normalize
])

# I do not augment the validation dataset (aside from resizing and tensor casting)
transforms_val = v2.Compose([
  v2.Resize((224, 224)),
  v2.ToTensor(),
  normalize
])

I now visualize the data transformation

In [None]:
dataset_index = 500

img, label = dataset[dataset_index]

fig, axs = plt.subplots(1,2, figsize=(12,6))
axs[0].imshow(unnormalize(transforms_val(img)).permute(1,2,0))
axs[0].set_title("validation transform (no augmentation)")

axs[1].imshow(unnormalize(transforms_train(img)).permute(1,2,0))
axs[1].set_title("training transform")
[ax.axis("off") for ax in axs] 

I now add the transform function to the dataset

In [None]:
train_dataset = Hurricane(root_dir="ipeo_hurricane_for_students",  split="train", transforms=transforms_train)
val_dataset = Hurricane(root_dir="ipeo_hurricane_for_students", split= 'validation', transforms=transforms_val)
test_dataset = Hurricane(root_dir="ipeo_hurricane_for_students", split= 'test', transforms=transforms_val)

# Visualization
fig, axs = plt.subplots(1,5, figsize=(5*3, 3))
for ax in axs:
    idx = np.random.randint(len(train_dataset)) # random sample
    image, label = train_dataset[idx]
    ax.imshow(unnormalize(image).permute(1,2,0))
    ax.set_title(f"idx {idx}, {list(Hurricane.LABEL_CLASSES.keys())[label]}")
    ax.axis("off")

fig.suptitle("training samples")
plt.tight_layout()

fig, axs = plt.subplots(1,5, figsize=(5*3, 3))
for ax in axs:
    idx = np.random.randint(len(val_dataset)) # random sample
    image, label = val_dataset[idx]
    ax.imshow(unnormalize(image).permute(1,2,0))
    ax.set_title(f"idx {idx}, {list(Hurricane.LABEL_CLASSES.keys())[label]}")
    ax.axis("off")

fig.suptitle("validation samples")
plt.tight_layout()

In [None]:
### ----------DataLoader creation---------

### Compute class weights for balanced sampling
train_labels = [label for _, label in train_dataset.data]

class_counts = torch.bincount(torch.tensor(train_labels))               # Count number of samples per class, here [1300, 6000]
class_weights = 1.0 / class_counts                                      # Inverse frequency, here [1/1300, 1/6000]
sample_weights = [class_weights[label] for label in train_labels]       # Assign weight to each sample based on its class

# Test if the weights are correct
# contribution totale par classe
total_weight_class0 = class_weights[0] * class_counts[0]
total_weight_class1 = class_weights[1] * class_counts[1]

total = total_weight_class0 + total_weight_class1

print("Sampling probabilities (true):")
print("damage (0)    →", (total_weight_class0 / total).item())
print("no_damage (1) →", (total_weight_class1 / total).item())

# The train batch are now sampled equally from all classes
# Each bach has the same number of samples from each class on average
sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

### 4. Create DataLoaders
BATCH_SIZE = 32

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    sampler=sampler,
    num_workers=4,
    pin_memory=False # only false on macos
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=False # only false on macos
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=False # only false on macos
)

In [None]:
### ----------ResNet 18----------
# Import
import torchvision.models as models
import torch.nn as nn
from sklearn.metrics import accuracy_score, f1_score
from torchvision.models import resnet18, ResNet18_Weights



# Function to build a ResNet model with a custom number of output classes
def build_resnet(num_classes=2, pretrained=True):
    """
    ResNet18 extracts generic visual features
    we only replace the classification head (last layer)
    the network learns to distinguish damage / no_damage
    """

    model = resnet18(weights=ResNet18_Weights.DEFAULT)

    # Replace the last fully connected layer
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)

    return model

# Create the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = build_resnet(num_classes=2, pretrained=True)
model = model.to(device)

images, labels = next(iter(train_loader))
images = images.to(device)

logits = model(images)

print("Logits shape:", logits.shape) # Expected output:Logits shape: torch.Size([32, 2])

In [None]:
# Define loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
# Using AdamW optimizer, which is well-suited for training deep learning models and stable with resnet
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=1e-4,
    weight_decay=1e-4
)


# ----------Training loop----------
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    all_preds = []
    all_targets = []
    running_loss = 0.0

    for images, targets in loader:
        images = images.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

        preds = torch.argmax(outputs, dim=1)
        all_preds.append(preds.cpu())
        all_targets.append(targets.cpu())

    all_preds = torch.cat(all_preds)
    all_targets = torch.cat(all_targets)

    acc = accuracy_score(all_targets, all_preds)
    f1 = f1_score(all_targets, all_preds)

    return running_loss / len(loader.dataset), acc, f1


# ----------Validation loop----------
def evaluate(model, loader, criterion, device):
    model.eval()
    all_preds = []
    all_targets = []
    running_loss = 0.0

    with torch.no_grad():
        for images, targets in loader:
            images = images.to(device)
            targets = targets.to(device)

            outputs = model(images)
            loss = criterion(outputs, targets)
            running_loss += loss.item() * images.size(0)

            preds = torch.argmax(outputs, dim=1)
            all_preds.append(preds.cpu())
            all_targets.append(targets.cpu())

    all_preds = torch.cat(all_preds)
    all_targets = torch.cat(all_targets)

    acc = accuracy_score(all_targets, all_preds)
    f1 = f1_score(all_targets, all_preds)

    return running_loss / len(loader.dataset), acc, f1

In [None]:
#----------Main training loop----------
num_epochs = 10
best_val_f1 = 0.0

for epoch in range(num_epochs):
    train_loss, train_acc, train_f1 = train_one_epoch(
        model, train_loader, optimizer, criterion, device
    )

    val_loss, val_acc, val_f1 = evaluate(
        model, val_loader, criterion, device
    )

    print(f"\nEpoch {epoch+1}/{num_epochs}")
    print(f"Train → loss={train_loss:.4f}, acc={train_acc:.4f}, f1={train_f1:.4f}")
    print(f"Val   → loss={val_loss:.4f}, acc={val_acc:.4f}, f1={val_f1:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "best_resnet18.pt")
        print("Best model saved")

# ----------Testing loop----------
# Load the best model
model.load_state_dict(torch.load("best_resnet18.pt"))

# Evaluate on test set
test_loss, test_acc, test_f1 = evaluate(
    model, test_loader, criterion, device
)

# Print test results
print(f"\nTEST → loss={test_loss:.4f}, acc={test_acc:.4f}, f1={test_f1:.4f}")