# primeiro modelo: modelo 02 com randomsearch


In [11]:
import torchvision.transforms as transforms
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader  # <-- Import Dataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
import os
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import tqdm as tqdm

import optuna
from optuna.pruners import MedianPruner

In [18]:

def create_multi_class_mask(image_size, polygons_boil, polygons_pan):
    """
    Create a multi-class segmentation mask where:
    - Boilers (nr_boil) are labeled as 0
    - Photovoltaics (nr_pan) are labeled as 1
    """
    mask = np.full(image_size, 1, dtype=np.uint8)  # Default background is now 1 (Photovoltaic)
    
    # Draw boiler panels with label 0
    for polygon in polygons_boil:
        cv2.fillPoly(mask, np.array([polygon], dtype=np.int32), 0)

    # Draw photovoltaic panels with label 1
    for polygon in polygons_pan:
        cv2.fillPoly(mask, np.array([polygon], dtype=np.int32), 1)

    return mask


In [19]:
df_train = pd.read_pickle('Model_Train.pkl')
df_val = pd.read_pickle('Model_Val.pkl')

# Albumentations transformation pipeline (same for image & mask)
albumentations_transform = A.Compose([
    A.Resize(256, 256),  # Resize both image & mask
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.GaussianBlur(p=0.2),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalize for DeepLabV3+
    ToTensorV2(),  # Convert to float tensor
])

class SolarPanelDataset(Dataset):
    def __init__(self, metadata_df, image_dir, transform=None, mask_size=(512, 512)):
        self.metadata = metadata_df
        self.image_dir = image_dir
        self.transform = transform
        self.mask_size = mask_size  # Target size for masks

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        row = self.metadata.iloc[idx]
        img_path = f"{self.image_dir}/{row['img_id']}.jpg"
        image = np.array(Image.open(img_path).convert("RGB"))

        # Ensure the mask is created with the same size as the image
        img_height, img_width = image.shape[:2]
        mask = create_multi_class_mask((img_height, img_width), row['polygons_boil'], row['polygons_pan'])
        mask = np.array(mask, dtype=np.uint8)  # Ensure mask is a NumPy array

        # Apply Albumentations transformations
        augmented = self.transform(image=image, mask=mask)
        image, mask = augmented["image"], augmented["mask"]

        # Convert mask to long tensor (class labels)
        mask = torch.tensor(mask, dtype=torch.long)

        return image, mask

# Define image directory
image_dir = "/Users/joaop.cardoso/MestradoCD/CAA/Project 1/images"

# Create train and validation datasets
train_dataset = SolarPanelDataset(df_train, image_dir, transform=albumentations_transform)
val_dataset = SolarPanelDataset(df_val, image_dir, transform=A.Compose([
    A.Resize(256, 256),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalize for DeepLabV3+
    ToTensorV2()
]))

# Create DataLoaders
batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [20]:
import segmentation_models_pytorch as smp

# Load DeepLabV3+ with EfficientNet backbone
model = smp.DeepLabV3Plus(
    encoder_name="efficientnet-b4",  # Choose EfficientNet-B4 as backbone
    encoder_weights="imagenet",  # Use pretrained ImageNet weights
    in_channels=3,  # RGB images
    classes=2  # Boiler (0), Photovoltaic (1)
)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


DeepLabV3Plus(
  (encoder): EfficientNetEncoder(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d((0, 1, 0, 1))
    )
    (_bn0): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          48, 48, kernel_size=(3, 3), stride=[1, 1], groups=48, bias=False
          (static_padding): ZeroPad2d((1, 1, 1, 1))
        )
        (_bn1): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          48, 12, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          12, 48, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_project_conv): Conv2dStatic

In [21]:
# Loss function (CrossEntropy for multi-class segmentation)
criterion = nn.CrossEntropyLoss()

# Adam optimizer with learning rate scheduling
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

Strong signs of overfitting

Next strategy:
- Use RandomSearch with *patience* to find the best hyperparameters
- Use early stopping to avoid overfitting

Second model
- Cancelled the hyperparameter fine tuning, would take way too long
- Dumped the third class (background), only working with the two classes that have panels

Third model
- Increase polygons area to train YOLO model



In [None]:
from tqdm import tqdm
import numpy as np

def iou_score(preds, labels, num_classes=3):
    """Compute IoU (Intersection over Union) for multi-class segmentation."""
    preds = torch.argmax(preds, dim=1)  # Convert logits to class predictions
    iou = []

    for cls in range(num_classes):
        intersection = ((preds == cls) & (labels == cls)).sum().item()
        union = ((preds == cls) | (labels == cls)).sum().item()
        if union == 0:
            iou.append(float('nan'))
        else:
            iou.append(intersection / union)

    return np.nanmean(iou)  # Ignore NaNs if a class is missing in batch

num_epochs = 20
best_val_loss = float("inf")

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    total_iou = 0.0
    num_batches = 0

    # Training Loop
    for images, masks in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} Training"):
        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)  # Model output

        loss = criterion(outputs, masks)  # Compute loss
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        total_iou += iou_score(outputs, masks, num_classes=3)
        num_batches += 1

    avg_train_loss = running_loss / num_batches
    avg_train_iou = total_iou / num_batches

    # Validation Loop
    model.eval()
    val_loss = 0.0
    val_iou = 0.0
    num_batches = 0

    with torch.no_grad():
        for images, masks in tqdm(val_loader, desc="Validation"):
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            loss = criterion(outputs, masks)

            val_loss += loss.item()
            val_iou += iou_score(outputs, masks, num_classes=3)
            num_batches += 1

    avg_val_loss = val_loss / num_batches
    avg_val_iou = val_iou / num_batches

    # Save Best Model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), "best_model_effnet.pth")
        print("🔥 Best Model Saved!")

    print(f"\n🔹 Epoch {epoch+1}/{num_epochs}")
    print(f"   📉 Train Loss: {avg_train_loss:.4f} | 🏆 Train IoU: {avg_train_iou:.4f}")
    print(f"   📉 Val Loss: {avg_val_loss:.4f} | 🏆 Val IoU: {avg_val_iou:.4f}")

    scheduler.step()


  mask = torch.tensor(mask, dtype=torch.long)
Epoch 1/20 Training: 100%|██████████| 610/610 [41:43<00:00,  4.10s/it]
Validation: 100%|██████████| 153/153 [04:07<00:00,  1.62s/it]


🔥 Best Model Saved!

🔹 Epoch 1/20
   📉 Train Loss: 0.0928 | 🏆 Train IoU: 0.6747
   📉 Val Loss: 0.0159 | 🏆 Val IoU: 0.7641


Epoch 2/20 Training: 100%|██████████| 610/610 [40:33<00:00,  3.99s/it]
Validation: 100%|██████████| 153/153 [04:03<00:00,  1.59s/it]


🔥 Best Model Saved!

🔹 Epoch 2/20
   📉 Train Loss: 0.0124 | 🏆 Train IoU: 0.7690
   📉 Val Loss: 0.0099 | 🏆 Val IoU: 0.7641


Epoch 3/20 Training:  47%|████▋     | 284/610 [20:30<22:21,  4.11s/it]