# primeiro modelo: modelo 01

In [1]:
import torchvision.transforms as transforms
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader  # <-- Import Dataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
import os
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import tqdm as tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

def create_multi_class_mask(image_size, polygons_boil, polygons_pan):
    """
    Create a multi-class segmentation mask where:
    - Boilers (nr_boil) are labeled as 0
    - Photovoltaics (nr_pan) are labeled as 1
    - Background is labeled as 2
    """
    mask = np.full(image_size, 2, dtype=np.uint8)  # Default background is 2

    # Draw boiler panels with label 0
    for polygon in polygons_boil:
        cv2.fillPoly(mask, np.array([polygon], dtype=np.int32), 0)

    # Draw photovoltaic panels with label 1
    for polygon in polygons_pan:
        cv2.fillPoly(mask, np.array([polygon], dtype=np.int32), 1)

    return mask


In [6]:
df_train = pd.read_pickle('Model_Train.pkl')
df_val = pd.read_pickle('Model_Val.pkl')

# Albumentations transformation pipeline (same for image & mask)
albumentations_transform = A.Compose([
    A.Resize(512, 512),  # Resize both image & mask
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.GaussianBlur(p=0.2),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalize for DeepLabV3+
    ToTensorV2(),  # Convert to float tensor
])

class SolarPanelDataset(Dataset):
    def __init__(self, metadata_df, image_dir, transform=None, mask_size=(512, 512)):
        self.metadata = metadata_df
        self.image_dir = image_dir
        self.transform = transform
        self.mask_size = mask_size  # Target size for masks

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        row = self.metadata.iloc[idx]
        img_path = f"{self.image_dir}/{row['img_id']}.jpg"
        image = np.array(Image.open(img_path).convert("RGB"))

        # Ensure the mask is created with the same size as the image
        img_height, img_width = image.shape[:2]
        mask = create_multi_class_mask((img_height, img_width), row['polygons_boil'], row['polygons_pan'])
        mask = np.array(mask, dtype=np.uint8)  # Ensure mask is a NumPy array

        # Apply Albumentations transformations
        augmented = self.transform(image=image, mask=mask)
        image, mask = augmented["image"], augmented["mask"]

        # Convert mask to long tensor (class labels)
        mask = torch.tensor(mask, dtype=torch.long)

        return image, mask

# Define image directory
image_dir = "/Users/joaop.cardoso/MestradoCD/CAA/Project 1/images"

# Create train and validation datasets
train_dataset = SolarPanelDataset(df_train, image_dir, transform=albumentations_transform)
val_dataset = SolarPanelDataset(df_val, image_dir, transform=A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalize for DeepLabV3+
    ToTensorV2()
]))

# Create DataLoaders
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [7]:
# Load DeepLabV3+ model
model = torchvision.models.segmentation.deeplabv3_resnet50(pretrained=True)

# Modify the classifier for 3 classes (Boiler=0, Photovoltaics=1, Background=2)
model.classifier[4] = nn.Conv2d(256, 3, kernel_size=(1,1))




In [8]:
# Define loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Move model to GPU if available
device = torch.device("cpu" if torch.cuda.is_available() else "cpu")
model.to(device)

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se

In [None]:
def iou_score(preds, labels, num_classes=3):
    """Compute IoU (Intersection over Union) for multi-class segmentation."""
    preds = torch.argmax(preds, dim=1)  # Convert logits to class predictions
    iou = []

    for cls in range(num_classes):
        intersection = ((preds == cls) & (labels == cls)).sum().item()
        union = ((preds == cls) | (labels == cls)).sum().item()
        if union == 0:
            iou.append(float('nan'))
        else:
            iou.append(intersection / union)

    return np.nanmean(iou)  # Ignore NaNs if a class is missing in batch

num_epochs = 5
best_val_loss = float("inf")  # Track best validation loss

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    total_iou = 0.0
    num_batches = 0

    # Training Loop
    for images, masks in tqdm.tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} Training"):

        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)['out']  # Model output (logits)

        loss = criterion(outputs, masks)  # Loss function
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        total_iou += iou_score(outputs, masks, num_classes=3)  # Compute IoU
        num_batches += 1

    avg_train_loss = running_loss / num_batches
    avg_train_iou = total_iou / num_batches

    # Validation Loop
    model.eval()
    val_loss = 0.0
    val_iou = 0.0
    num_batches = 0

    with torch.no_grad():
        for images, masks in tqdm.tqdm(val_loader, desc="Validation"):

            images, masks = images.to(device), masks.to(device)
            outputs = model(images)['out']
            loss = criterion(outputs, masks)

            val_loss += loss.item()
            val_iou += iou_score(outputs, masks, num_classes=3)
            num_batches += 1

    avg_val_loss = val_loss / num_batches
    avg_val_iou = val_iou / num_batches

    # Save Best Model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), "best_model.pth")
        print("🔥 Best Model Saved!")

    # Print Epoch Results
    print(f"\n🔹 Epoch {epoch+1}/{num_epochs}")
    print(f"   📉 Train Loss: {avg_train_loss:.4f} | 🏆 Train IoU: {avg_train_iou:.4f}")
    print(f"   📉 Val Loss: {avg_val_loss:.4f} | 🏆 Val IoU: {avg_val_iou:.4f}")



  mask = torch.tensor(mask, dtype=torch.long)
