In [1]:
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from sklearn.preprocessing import OneHotEncoder
from collections import Counter

import segmentation_models_pytorch as smp
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.cuda.amp import autocast, GradScaler

from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


This model is using a different strategy:
- The metadata was encoded onto the images via one hot encoding 
- Based on the 2 classes and 2 origins, the class balancing was attempted for the 4 classes during the albumentations step (although officially there are only 2 classes still, solar and boiler)


In [10]:
# Function to create multi-class mask
def create_multi_class_mask(image_size, polygons_boil, polygons_pan):
    mask = np.full(image_size, 1, dtype=np.uint8)  # Default background is Photovoltaic (1)
    
    # Draw boiler panels (0)
    for polygon in polygons_boil:
        cv2.fillPoly(mask, np.array([polygon], dtype=np.int32), 0)

    return mask

# Load data
df_train = pd.read_pickle('Model_Train.pkl')
df_val = pd.read_pickle('Model_Val.pkl')

# One-hot encode metadata
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
metadata_encoded = encoder.fit_transform(df_train[['img_placement', 'img_origin']])

# Define transformation pipelines
albumentations_transform = A.Compose([
    A.Resize(512, 512),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.GaussianBlur(p=0.2),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

# Dataset class
class SolarPanelDataset(Dataset):
    def __init__(self, metadata_df, image_dir, transform=None, mask_size=(512, 512), balance=False):
        self.metadata = metadata_df
        self.image_dir = image_dir
        self.transform = transform
        self.mask_size = mask_size
        self.balance = balance
        
        # One-hot encode metadata
        self.encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
        self.encoded_metadata = self.encoder.fit_transform(self.metadata[['img_placement', 'img_origin']])
        
        # Create class labels for balancing
        self.class_labels = self.metadata.apply(lambda row: f"{row['img_origin']}_{'solar' if row['polygons_pan'] else 'boiler'}", axis=1)
        
        # Compute class weights for balancing
        if balance:
            class_counts = Counter(self.class_labels)
            self.weights = [1.0 / class_counts[label] for label in self.class_labels]
        else:
            self.weights = None

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        row = self.metadata.iloc[idx]
        img_path = f"{self.image_dir}/{row['img_id']}.jpg"
        image = np.array(Image.open(img_path).convert("RGB"))

        # Create the mask
        mask = create_multi_class_mask(image.shape[:2], row['polygons_boil'], row['polygons_pan'])
        mask = np.array(mask, dtype=np.uint8)

        # Apply transformations
        augmented = self.transform(image=image, mask=mask)
        image, mask = augmented["image"], augmented["mask"]

        # Convert mask to long tensor
        mask = torch.tensor(mask, dtype=torch.long)

        # Get one-hot encoded metadata
        metadata_vector = torch.tensor(self.encoded_metadata[idx], dtype=torch.float32)

        return image, mask, metadata_vector  # Return metadata as additional input

# Define image directory
image_dir = "/Users/joaop.cardoso/MestradoCD/CAA/Project 1/images"

# Create train dataset with class balancing
train_dataset = SolarPanelDataset(df_train, image_dir, transform=albumentations_transform, balance=True)
val_dataset = SolarPanelDataset(df_val, image_dir, transform=A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
]))

# Create Weighted Sampler for class balancing
if train_dataset.weights:
    sampler = WeightedRandomSampler(weights=train_dataset.weights, num_samples=len(train_dataset), replacement=True)
else:
    sampler = None

# Create DataLoaders
batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler if sampler else None, shuffle=sampler is None, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [11]:
# Load DeepLabV3+ with EfficientNet-B4 backbone
model = smp.DeepLabV3Plus(
    encoder_name="efficientnet-b4",  # EfficientNet-B4 as the encoder
    encoder_weights="imagenet",  # Pretrained weights
    in_channels=3,  # RGB images
    classes=2  # Boiler (0), Photovoltaic (1)
)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Add dropout before the classifier correctly
model.segmentation_head = nn.Sequential(
    nn.Dropout(0.3),  # 30% dropout
    model.segmentation_head
)

# Define loss function (CrossEntropy + Dice Loss for better performance)
criterion = nn.CrossEntropyLoss()
dice_loss = smp.losses.DiceLoss(mode='multiclass')

# Adam optimizer with weight decay
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)

# Learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

# Mixed precision scaler for faster GPU training
scaler = GradScaler()

  scaler = GradScaler()


In [12]:
# Function to calculate IoU
def iou_score(preds, labels, num_classes=2):
    """Compute IoU (Intersection over Union) for multi-class segmentation."""
    preds = torch.argmax(preds, dim=1)  # Convert logits to class predictions
    iou = []

    for cls in range(num_classes):
        intersection = ((preds == cls) & (labels == cls)).sum().item()
        union = ((preds == cls) | (labels == cls)).sum().item()
        if union == 0:
            iou.append(float('nan'))
        else:
            iou.append(intersection / union)

    return np.nanmean(iou)  # Ignore NaNs if a class is missing in batch


# 🔹 Model Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = smp.DeepLabV3Plus(
    encoder_name="efficientnet-b4",
    encoder_weights="imagenet",
    in_channels=3,
    classes=2
).to(device)

# 🔹 Add Dropout Correctly
model.segmentation_head = nn.Sequential(
    nn.Dropout(0.3),
    model.segmentation_head
)

# 🔹 Loss Functions (CrossEntropy + Dice Loss)
criterion = nn.CrossEntropyLoss()
dice_loss = smp.losses.DiceLoss(mode='multiclass')

# 🔹 Optimizer & LR Scheduler
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

# 🔹 Mixed Precision (Speeds up Training)
scaler = GradScaler()

# Training Hyperparameters
num_epochs = 20
best_val_loss = float("inf")
accumulation_steps = 4  # Simulates larger batch size

# 🔹 Training Loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    total_iou = 0.0
    num_batches = 0

    optimizer.zero_grad()  # Initialize gradients before accumulation

    for i, (images, masks, _) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} Training")):
        images, masks = images.to(device), masks.to(device)

        with autocast():  # Enables mixed precision
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, masks) + dice_loss(outputs, masks)  # Combined loss
        
        scaler.scale(loss).backward()  # Accumulate gradients

        # 🔹 Only update every `accumulation_steps`
        if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()  # Reset gradients

        running_loss += loss.item()
        total_iou += iou_score(outputs, masks, num_classes=2)
        num_batches += 1

    avg_train_loss = running_loss / num_batches
    avg_train_iou = total_iou / num_batches

    # 🔹 Validation Loop
    model.eval()
    val_loss = 0.0
    val_iou = 0.0
    num_batches = 0

    with torch.no_grad():
        for images, masks, _ in tqdm(val_loader, desc="Validation"):
            images, masks = images.to(device), masks.to(device)

            with autocast():  # Use mixed precision in inference
                outputs = model(images)
                loss = criterion(outputs, masks) + dice_loss(outputs, masks)

            val_loss += loss.item()
            val_iou += iou_score(outputs, masks, num_classes=2)
            num_batches += 1

    avg_val_loss = val_loss / num_batches
    avg_val_iou = val_iou / num_batches

    # 🔥 Save Best Model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), f"best_model_effnet_deepl_epoch{epoch}.pth")
        print("🔥 Best Model Saved!")

    # 🔹 Logging
    print(f"\n🔹 Epoch {epoch+1}/{num_epochs}")
    print(f"   📉 Train Loss: {avg_train_loss:.4f} | 🏆 Train IoU: {avg_train_iou:.4f}")
    print(f"   📉 Val Loss: {avg_val_loss:.4f} | 🏆 Val IoU: {avg_val_iou:.4f}")

    # 🔹 Adjust LR based on Validation Loss
    scheduler.step(avg_val_loss)


  scaler = GradScaler()
  mask = torch.tensor(mask, dtype=torch.long)
  with autocast():  # Enables mixed precision
Epoch 1/20 Training:   0%|          | 2/610 [01:02<5:18:06, 31.39s/it]


KeyboardInterrupt: 