In [None]:
!pip install awscli

In [None]:
!aws configure

In [None]:
!aws s3 ls s3://spacenet-dataset/AOIs/AOI_1_Rio/

In [None]:
!mkdir datasets/AOIs/AOI_1_Rio && aws s3 cp s3://spacenet-dataset/AOIs/AOI_1_Rio/processedData/processedBuildingLabels.tar.gz datasets/AOIs/AOI_1_Rio/

In [None]:
# Unzip the main folder
!tar -xvzf datasets/AOIs/AOI_1_Rio/processedBuildingLabels.tar.gz -C datasets/AOIs/AOI_1_Rio/

In [None]:
# Unzip 3band satellite images
!tar -xvzf datasets/AOIs/AOI_1_Rio/processedBuildingLabels/3band.tar.gz

In [None]:
# Unzip geojson containing labels satellite images
!tar -xvzf datasets/AOIs/AOI_1_Rio/processedBuildingLabels/vectordata/geojson.tar.gz

In [None]:
import os
from os.path import join

In [None]:
images_folder = "3band"
labels_folder = "geojson"
print(len([name for name in os.listdir(images_folder)]), "satellite images")
print(len([name for name in os.listdir(labels_folder)]), "geojson labels")

In [None]:
!pip install rasterio

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
# Define transformers
train_transform = A.Compose([
    A.Resize(448, 448),  # Make divisible by 32 (2^5)
    A.HorizontalFlip(),
    A.VerticalFlip(),
    A.RandomRotate90(),
    A.Normalize(),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(448, 448),
    A.Normalize(),
    ToTensorV2()
])

In [None]:
# Create datasets
from RioDataset import RioDataset
train_dataset = RioDataset(
    tiff_dir=images_folder,
    geojson_dir=labels_folder,
    transform=train_transform
)

val_dataset = RioDataset(
    tiff_dir=images_folder,
    geojson_dir=labels_folder,
    transform=val_transform
)

In [None]:
# Create dataloaders
num_workers = 2
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=False, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=num_workers)

In [None]:
# Initialize model
from models.unet.model import UNet
model = UNet(num_classes=1, in_channels=3)  # Adjust in_channels based on your .tiff files
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device: ", device)
model = model.to(device)

In [None]:
from DiceBCELoss import DiceBCELoss
criterion = DiceBCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

In [None]:
# Trainig Loop
num_epochs = 50
for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0
    
    for images, masks in train_loader:
        images = images.to(device)
        masks = masks.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, masks)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    # Validation phase
    model.eval()
    val_loss = 0
    dice_score = 0
    
    with torch.no_grad():
        for images, masks in val_loader:
            images = images.to(device)
            masks = masks.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, masks)
            val_loss += loss.item()
            
            # Calculate Dice coefficient
            pred = torch.sigmoid(outputs) > 0.5
            dice_score += (2 * (pred * masks).sum()) / ((pred + masks).sum() + 1e-8)
    
    # Print metrics
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_loader):.4f}, '
          f'Val Loss: {val_loss/len(val_loader):.4f}, Dice: {dice_score/len(val_loader):.4f}')
    
    # Update learning rate based on validation loss
    scheduler.step(val_loss)
    
# Save the trained model
torch.save(model.state_dict(), 'building_segmentation_model.pth')