In [1]:
import os
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
from pathlib import Path

In [7]:
# Print current working directory and device for debugging
print(f"Current working directory: {os.getcwd()}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == "cuda":
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory Allocated: {torch.cuda.memory_allocated(0)/1024**3:.2f} GB")

Current working directory: c:\Users\alpay\Dev_projects\Nelissen Project\facade-analysis-ai\notebooks
Using device: cuda
GPU Name: NVIDIA GeForce RTX 3050 Laptop GPU
GPU Memory Allocated: 0.68 GB


In [2]:
# Print current working directory for debugging
print(f"Current working directory: {os.getcwd()}")

# --- Paths ---
root_dir = Path("../data/cmp_facade_dataset")
image_root = root_dir / "images"
mask_root = root_dir / "annotations"

# Verify paths exist
print(f"Image root path: {image_root}")
print(f"Mask root path: {mask_root}")
print(f"Image train path exists: {(image_root / 'train').exists()}")
print(f"Mask train path exists: {(mask_root / 'train').exists()}")


Current working directory: c:\Users\alpay\Dev_projects\Nelissen Project\facade-analysis-ai\notebooks
Image root path: ..\data\cmp_facade_dataset\images
Mask root path: ..\data\cmp_facade_dataset\annotations
Image train path exists: True
Mask train path exists: True


In [3]:
# Custom Dataset Class with Resizing
class CMPFacadeDataset(Dataset):
    def __init__(self, image_dir, annot_dir, transform=None, resize_size=(512, 512)):
        self.image_dir = image_dir
        self.annot_dir = annot_dir
        self.transform = transform
        self.resize_size = resize_size
        
        # List image and annotation files
        print(f"Listing files in {image_dir}")
        image_files = sorted([f for f in os.listdir(image_dir) if f.endswith(".png")])
        print(f"Image files: {image_files[:5]} (total: {len(image_files)})")
        
        print(f"Listing files in {annot_dir}")
        annot_files = sorted([f for f in os.listdir(annot_dir) if f.endswith(".png")])
        print(f"Annotation files: {annot_files[:5]} (total: {len(annot_files)})")
        
        # Pair files by extracting indices (e.g., image_0.png with annotation_0.png)
        self.image_files = []
        self.annot_files = []
        for img_file in image_files:
            img_idx = img_file.replace("image_", "").replace(".png", "")
            annot_file = f"annotation_{img_idx}.png"
            if annot_file in annot_files:
                self.image_files.append(img_file)
                self.annot_files.append(annot_file)
            else:
                print(f"Warning: No matching annotation for {img_file}")
        
        if not self.image_files:
            raise ValueError(f"No matching image-annotation pairs found in {image_dir} and {annot_dir}")
        print(f"Found {len(self.image_files)} image-annotation pairs in {image_dir}")
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        try:
            img_path = os.path.join(self.image_dir, self.image_files[idx])
            annot_path = os.path.join(self.annot_dir, self.annot_files[idx])
            
            image = Image.open(img_path).convert("RGB")
            annotation = Image.open(annot_path)
            
            # Resize image and annotation to the same size
            image = image.resize(self.resize_size, Image.Resampling.LANCZOS)
            annotation = annotation.resize(self.resize_size, Image.Resampling.NEAREST)  # NEAREST for masks to avoid interpolation artifacts
            
            image = np.array(image)
            annotation = np.array(annotation)
            
            # Ensure class IDs are in range 0-11 (background as 0, classes 1-11)
            annotation = np.clip(annotation, 0, 11)
            
            # Convert to tensor
            image = torch.from_numpy(image).permute(2, 0, 1).float() / 255.0
            annotation = torch.from_numpy(annotation).long()
            
            if self.transform:
                image = self.transform(image)
            
            return image, annotation
        except Exception as e:
            print(f"Error loading {self.image_files[idx]}: {e}")
            raise

In [4]:
# Create Datasets with resizing
train_dataset = CMPFacadeDataset(image_root / "train", mask_root / "train", transform=None, resize_size=(512, 512))
eval_dataset = CMPFacadeDataset(image_root / "eval", mask_root / "eval", resize_size=(512, 512))
test_dataset = CMPFacadeDataset(image_root / "test", mask_root / "test", resize_size=(512, 512))

# Create Data Loaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
eval_loader = DataLoader(eval_dataset, batch_size=4, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=0)

# Load DeepLabv3 Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torchvision.models.segmentation.deeplabv3_resnet50(pretrained=True)

# Modify the classifier for 12 classes (0-11, including background)
model.classifier[4] = nn.Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
model = model.to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Listing files in ..\data\cmp_facade_dataset\images\train
Image files: ['image_0.png', 'image_1.png', 'image_10.png', 'image_100.png', 'image_101.png'] (total: 378)
Listing files in ..\data\cmp_facade_dataset\annotations\train
Annotation files: ['annotation_0.png', 'annotation_1.png', 'annotation_10.png', 'annotation_100.png', 'annotation_101.png'] (total: 378)
Found 378 image-annotation pairs in ..\data\cmp_facade_dataset\images\train
Listing files in ..\data\cmp_facade_dataset\images\eval
Image files: ['image_0.png', 'image_1.png', 'image_10.png', 'image_100.png', 'image_101.png'] (total: 114)
Listing files in ..\data\cmp_facade_dataset\annotations\eval
Annotation files: ['annotation_0.png', 'annotation_1.png', 'annotation_10.png', 'annotation_100.png', 'annotation_101.png'] (total: 114)
Found 114 image-annotation pairs in ..\data\cmp_facade_dataset\images\eval
Listing files in ..\data\cmp_facade_dataset\images\test
Image files: ['image_0.png', 'image_1.png', 'image_10.png', 'image_10



In [6]:
# Training Loop with Progress Logging
num_epochs = 10
best_loss = float('inf')
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    total_batches = len(train_loader)
    print(f"Starting Epoch {epoch+1}/{num_epochs}, Total Batches: {total_batches}")
    
    for batch_idx, (images, annotations) in enumerate(train_loader):
        print(f"Processing Batch {batch_idx+1}/{total_batches}")
        images, annotations = images.to(device), annotations.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)['out']
        loss = criterion(outputs, annotations)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        print(f"Batch {batch_idx+1}/{total_batches}, Loss: {loss.item():.4f}")
    
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {epoch_loss:.4f}")
    
    # Validation
    model.eval()
    val_loss = 0.0
    total_val_batches = len(eval_loader)
    print(f"Starting Validation, Total Batches: {total_val_batches}")
    
    with torch.no_grad():
        for batch_idx, (images, annotations) in enumerate(eval_loader):
            print(f"Validation Batch {batch_idx+1}/{total_val_batches}")
            images, annotations = images.to(device), annotations.to(device)
            outputs = model(images)['out']
            loss = criterion(outputs, annotations)
            val_loss += loss.item() * images.size(0)
    
    val_loss = val_loss / len(eval_loader.dataset)
    print(f"Validation Loss: {val_loss:.4f}")
    
    # Save best model
    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), "deeplabv3_best_model.pth")
        print("Saved best model")


Starting Epoch 1/10, Total Batches: 95
Processing Batch 1/95
Batch 1/95, Loss: 1.3733
Processing Batch 2/95
Batch 2/95, Loss: 1.1901
Processing Batch 3/95
Batch 3/95, Loss: 1.2441
Processing Batch 4/95
Batch 4/95, Loss: 1.1750
Processing Batch 5/95
Batch 5/95, Loss: 1.2539
Processing Batch 6/95
Batch 6/95, Loss: 0.9911
Processing Batch 7/95


KeyboardInterrupt: 

In [None]:
# Testing
model.load_state_dict(torch.load("deeplabv3_best_model.pth"))
model.eval()
test_loss = 0.0
with torch.no_grad():
    for images, annotations in test_loader:
        images, annotations = images.to(device), annotations.to(device)
        outputs = model(images)['out']
        loss = criterion(outputs, annotations)
        test_loss += loss.item() * images.size(0)

test_loss = test_loss / len(test_loader.dataset)
print(f"Test Loss: {test_loss:.4f}")

# Visualize a Test Prediction
model.eval()
with torch.no_grad():
    images, annotations = next(iter(test_loader))
    images, annotations = images.to(device), annotations.to(device)
    outputs = model(images)['out']
    preds = torch.argmax(outputs, dim=1)
    
    # Plot
    plt.figure(figsize=(15, 5))
    plt.subplot(1, 3, 1)
    plt.imshow(images[0].cpu().permute(1, 2, 0).numpy())
    plt.title("Image")
    plt.axis("off")
    
    plt.subplot(1, 3, 2)
    plt.imshow(annotations[0].cpu().numpy(), cmap="tab20", vmin=0, vmax=11)
    plt.title("Ground Truth")
    plt.axis("off")
    
    plt.subplot(1, 3, 3)
    plt.imshow(preds[0].cpu().numpy(), cmap="tab20", vmin=0, vmax=11)
    plt.title("Prediction")
    plt.axis("off")
    
    plt.show()

if __name__ == "__main__":
    print("Script executed successfully")