In [None]:
!pip install awscli

In [None]:
!aws configure

In [None]:
!aws s3 ls s3://spacenet-dataset/AOIs/AOI_1_Rio/

In [None]:
!mkdir -p datasets/AOIs/AOI_1_Rio && aws s3 cp s3://spacenet-dataset/AOIs/AOI_1_Rio/processedData/processedBuildingLabels.tar.gz datasets/AOIs/AOI_1_Rio/

In [None]:
# Unzip the main folder
!tar -xvzf datasets/AOIs/AOI_1_Rio/processedBuildingLabels.tar.gz -C datasets/AOIs/AOI_1_Rio/

In [None]:
# Unzip 3band satellite images
!tar -xvzf datasets/AOIs/AOI_1_Rio/processedBuildingLabels/3band.tar.gz

In [None]:
# Unzip geojson containing labels satellite images
!tar -xvzf datasets/AOIs/AOI_1_Rio/processedBuildingLabels/vectordata/geojson.tar.gz

In [None]:
import os
from os.path import join

In [None]:
images_folder = "3band"
labels_folder = "geojson"
print(len([name for name in os.listdir(images_folder)]), "satellite images")
print(len([name for name in os.listdir(labels_folder)]), "geojson labels")

In [None]:
!pip install rasterio

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from create_data_loaders import create_data_loaders

In [None]:
import shutil
target_dir = os.getcwd() + "/Rio"
os.makedirs(target_dir, exist_ok=True)

def merge_folders(source1, source2, destination, rename_duplicates=True):
    os.makedirs(destination, exist_ok=True)

    def copy_files(source):
        for filename in os.listdir(source):
            src_path = os.path.join(source, filename)
            if os.path.isfile(src_path):
                dest_path = os.path.join(destination, filename)

                if os.path.exists(dest_path) and rename_duplicates:
                    # Add a suffix to avoid overwriting
                    name, ext = os.path.splitext(filename)
                    counter = 1
                    while os.path.exists(dest_path):
                        new_name = f"{name}_{counter}{ext}"
                        dest_path = os.path.join(destination, new_name)
                        counter += 1

                shutil.copy2(src_path, dest_path)

    copy_files(source1)
    copy_files(source2)

    print("Finish merging")

merge_folders(images_folder, labels_folder, target_dir)

In [None]:
import importlib
import RioDataset
import create_data_loaders
importlib.reload(RioDataset)
importlib.reload(create_data_loaders)

In [None]:
from create_data_loaders import create_data_loaders
train_loader, val_loader, test_loader, full_dataset = create_data_loaders(target_dir, batch_size=32, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15, random_seed=42)
print(train_loader)
print(val_loader)
print(test_loader)
print(full_dataset)

In [None]:
# Initialize model
from models.unet.model import UNet
model = UNet(num_classes=1, in_channels=3)  # Adjust in_channels based on your .tiff files
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device: ", device)
model = model.to(device)

In [None]:
from DiceBCELoss import DiceBCELoss
criterion = DiceBCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

In [None]:
from tqdm import tqdm

# Trainig Loop
num_epochs = 50
for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0
    train_acc = 0
    
    loop = tqdm(train_loader, total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}")
    for images, masks in loop:
        images = images.to(device)
        masks = masks.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, masks)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        pred = torch.sigmoid(outputs) > 0.5
        acc = (pred == masks).float().mean()
        
        train_loss += loss.item()
        train_acc += acc.item()

        loop.set_postfix({
            "loss": loss.item(),
            "acc": acc.item()
        })
    
    # Validation phase
    model.eval()
    val_loss = 0
    dice_score = 0
    
    with torch.no_grad():
        for images, masks in val_loader:
            images = images.to(device)
            masks = masks.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, masks)
            val_loss += loss.item()
            
            # Calculate Dice coefficient
            pred = torch.sigmoid(outputs) > 0.5
            dice_score += (2 * (pred * masks).sum()) / ((pred + masks).sum() + 1e-8)
    
    avg_train_loss = train_loss / len(train_loader)
    avg_train_acc = train_acc / len(train_loader)

    # Print metrics
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, '
          f'Val Loss: {val_loss/len(val_loader):.4f}, Dice: {dice_score/len(val_loader):.4f}')
    
    # Update learning rate based on validation losp
    scheduler.step(val_loss)
    
# Save the trained model
torch.save(model.state_dict(), 'building_segmentation_model.pth')

In [None]:
# Clean up
del model, train_loader, val_loader, outputs, loss
import torch
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
import gc
gc.collect()