Kaggle Code Block

In [None]:
import os
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
GITHUB_PAT = user_secrets.get_secret("GITHUB_PAT_CM")

!git clone "https://username:{GITHUB_PAT}@github.com/Noor-Nizar/ClosureMaster.git"
os.chdir("ClosureMaster")
# !pip install -r 'requirements.txt' -q ## TODO add requirements.txt

In [None]:
from models.PlaceNet import PlaceNet
from helpers import logger, visualize_segmentation
from datasets import SegmentationDataset
import logging
import torch

logger.setLevel(logging.INFO)

In [None]:
model_pn = PlaceNet()

In [None]:
dummy_in_full = torch.ones((1,7,480,640))
dummy_in_half = torch.ones((1,7,240,320))
dummy_in_quarter = torch.ones((1,7,120,160))

recon_full, recon_half, recon_quarter = model_pn(dummy_in_full, dummy_in_half, dummy_in_quarter)

print("-"*100)
print(recon_full.shape)
print(recon_half.shape)
print(recon_quarter.shape)

In [None]:
from helpers import WMSELoss

loss = WMSELoss(recon_full, dummy_in_full)
loss

In [None]:
from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
from torch.utils.data import DataLoader

# Initialize processor and model
model_base = "openmmlab/upernet-swin-large"
processor = AutoImageProcessor.from_pretrained(model_base)
model = UperNetForSemanticSegmentation.from_pretrained(model_base)

In [None]:
# Replace this with the path to your dataset of images
image_dir = "/kaggle/input/city-center-visual-loop-detection/CityCentreImages/Images"
# image_dir = "/Users/noornizar/LocalDocuments/ClosureMaster/images"
# 
import glob

all_images = glob.glob(image_dir + "/*.jpg")

# Calculate the number of images for each split
num_images = len(all_images)
# num_images = 16 ## for quick Testing  

val_ratio = 0.2
test_ratio = 0.1

num_train = int(num_images * (1 - val_ratio - test_ratio))
num_val = int(num_images * val_ratio)
num_test = num_images - num_train - num_val

# Shuffle the image paths
import random
random.shuffle(all_images)

# Split the image paths
training_paths = all_images[:num_train]
val_paths = all_images[num_train:num_train+num_val]
test_paths = all_images[num_train+num_val:]

print(f"Number of training images: {len(training_paths)}")
print(f"Number of validation images: {len(val_paths)}")
print(f"Number of test images: {len(test_paths)}")

# Create datasets and dataloaders
train_set = SegmentationDataset(training_paths, processor)
val_set = SegmentationDataset(val_paths, processor)
test_set = SegmentationDataset(test_paths, processor)

# Create dataloaders
train_loader = DataLoader(train_set, batch_size=4, shuffle=True)
val_loader = DataLoader(val_set, batch_size=8, shuffle=False)
test_loader = DataLoader(test_set, batch_size=8, shuffle=False)

In [None]:
from helpers.utils import classify_objects, classify_objects_tensor_batched
from helpers.visualization import convert_to_rgb, convert_to_rgb_batched

In [None]:
s1, s2, s3 = next(iter(train_loader))

In [None]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [None]:
model = model.to(device)
model.eval()

model_pn = model_pn.to(device)

In [None]:
import torchvision.transforms as T

normalizer = T.Compose([
                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])

normalizer_cls = T.Compose([
                T.Normalize(mean=[0.485], std=[0.229])
            ])

def dflow(pixel_values):
    with torch.no_grad():
        outputs = model(pixel_values)
        batch_size = pixel_values.shape[0]
        seg_list = processor.post_process_semantic_segmentation(outputs, target_sizes=[pixel_values.shape[2:]] * batch_size)
        seg = torch.stack(seg_list)
        cls_seg = classify_objects_tensor_batched(seg).unsqueeze(1)
        rgb_seg = convert_to_rgb_batched(seg)
    
        
        cls_seg = normalizer_cls(cls_seg)
        
        rgb_seg = rgb_seg.float()/255
        rgb_seg = normalizer(rgb_seg)
        
        combined = torch.cat([pixel_values, cls_seg, rgb_seg], dim=1)
    return combined

In [None]:
# Process each image in the dataset
s1, s2, s3 = next(iter(train_loader))
s1 = s1.to(device)
s2 = s2.to(device)
s3 = s3.to(device)

combined_s1 = dflow(s1)    
combined_s2 = dflow(s2)
combined_s3 = dflow(s3)

In [None]:
combined_s1.shape

In [None]:
combined_s2.shape

In [None]:
combined_s3.shape

In [None]:
recon_full, recon_half, recon_quarter = model_pn(combined_s1, combined_s2, combined_s3)

print("-"*100)
print(recon_full.shape)
print(recon_half.shape)
print(recon_quarter.shape)

In [None]:
# ... existing imports ...
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output


%matplotlib inline

def train_model(model_pn, train_loader, val_loader, num_epochs=20, patience=10, lr=0.3):
    
    optimizer = Adam(model_pn.parameters(), lr=lr)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4, verbose=True)
    criterion = WMSELoss

    best_val_loss = float('inf')
    early_stopping_counter = 0
    
    # Lists to store metrics for plotting
    train_losses = []
    val_losses = []
    learning_rates = []
    
    for epoch in range(num_epochs):
        # Training phase
        model_pn.train()
        train_loss = 0.0
        
        for s1, s2, s3 in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
            s1, s2, s3 = s1.to(device), s2.to(device), s3.to(device)
            
            optimizer.zero_grad()
            
            combined_s1 = dflow(s1)
            combined_s2 = dflow(s2)
            combined_s3 = dflow(s3)
            
            recon_full, recon_half, recon_quarter = model_pn(combined_s1, combined_s2, combined_s3)
            
            loss = criterion(recon_full, combined_s1) + \
                   criterion(recon_half, combined_s2) + \
                   criterion(recon_quarter, combined_s3)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        avg_train_loss = train_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        
        # Validation phase
        model_pn.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            for s1, s2, s3 in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
                s1, s2, s3 = s1.to(device), s2.to(device), s3.to(device)
                
                combined_s1 = dflow(s1)
                combined_s2 = dflow(s2)
                combined_s3 = dflow(s3)
                
                recon_full, recon_half, recon_quarter = model_pn(combined_s1, combined_s2, combined_s3)
                
                loss = criterion(recon_full, combined_s1) + \
                       criterion(recon_half, combined_s2) + \
                       criterion(recon_quarter, combined_s3)
                
                val_loss += loss.item()
        
        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        # Get current learning rate
        current_lr = optimizer.param_groups[0]['lr']
        learning_rates.append(current_lr)
    
        # Learning rate scheduler step
        scheduler.step(avg_val_loss)
        
        # Early stopping check
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            early_stopping_counter = 0
            # Save the best model
            torch.save(model_pn.state_dict(), "best_placenet_model.pth")
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break
        
        clear_output()
        
        # Print epoch results
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, LR: {current_lr:.6f}")
        
        # Plot training and validation loss
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.plot(train_losses, label='Training Loss')
        plt.plot(val_losses, label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Training and Validation Loss')
        plt.legend()

        # Plot learning rate
        plt.subplot(1, 2, 2)
        plt.plot(learning_rates)
        plt.xlabel('Epoch')
        plt.ylabel('Learning Rate')
        plt.title('Learning Rate over Epochs')
        plt.yscale('log')  # Use log scale for better visualization

        plt.tight_layout()
        plt.savefig('training_metrics.png')
        plt.show()
    
    return model_pn, train_losses, val_losses, learning_rates

# Usage
# model_pn = PlaceNet()
trained_model, train_losses, val_losses, learning_rates = train_model(model_pn, train_loader, val_loader, num_epochs=50, patience=10, lr=0.1)

In [None]:
torch.cuda.empty_cache()

In [None]:
with torch.no_grad():
    s1, s2, s3 = next(iter(train_loader))

    s1 = s1.to(device)
    s2 = s2.to(device)
    s3 = s3.to(device)

    combined_s1 = dflow(s1)    
    combined_s2 = dflow(s2)
    combined_s3 = dflow(s3)

    recon_full, recon_half, recon_quarter = model_pn(combined_s1, combined_s2, combined_s3)

In [None]:
recon_quarter_np = recon_quarter.detach().cpu().numpy()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
vis_orig = s3.detach().cpu().numpy()[0]
vis_orig = np.transpose(vis_orig, (1,2,0))
plt.imshow(vis_orig)

In [None]:
vis = np.transpose(recon_quarter_np[0][0:3], (1,2,0))

plt.imshow(vis)

# NICE