### Training Vlads model by first stacking the depths from the Sota models

In [1]:
import sys, os
import torch, wandb
import torch.nn as nn
from torch.utils.data import DataLoader
sys.path.append(os.path.abspath(os.path.join(os.curdir, '..')))
from configs import extended_unet_config as config
from models.unet_convnextv2 import Unet
from datasets.combination_depth_dataset import CombDepthDataset
from utils.train_utils import train_model
import utils.train_utils as tu
from utils.train_utils import evaluate_model
import importlib

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


In [2]:
# Set a fixed random seed for reproducibility
torch.manual_seed(config.random_seed)

train_full_dataset = CombDepthDataset(
    data_dir=os.path.join(config.dataset_path, 'train/train'),
    depths_dir=os.path.join(config.depth_maps_path, 'train'),
    list_file=os.path.join(config.dataset_path, 'train_list.txt'),
    transform=config.padded_transform,
    target_transform=config.target_transform,
    has_gt=True,
    depth_model_names=config.depth_model_names,
    uncertainty_dir=None,
    use_uncertainty=None)
    
    # Create test dataset without ground truth
test_dataset = CombDepthDataset(
    data_dir=os.path.join(config.dataset_path, 'test/test'),
    depths_dir=os.path.join(config.depth_maps_path, 'test'),
    list_file=os.path.join(config.dataset_path, 'test_list.txt'),
    transform=config.padded_transform,
    has_gt=False,
    depth_model_names=config.depth_model_names,
    uncertainty_dir=None,
    use_uncertainty=None)  # Test set has no ground truth
    
# Split training dataset into train and validation
total_size = len(train_full_dataset)
train_size = int((1-config.val_part) * total_size)  
val_size = total_size - train_size    
    
train_dataset, val_dataset = torch.utils.data.random_split(
    train_full_dataset, [train_size, val_size]
)
#val_dataset.transform = config.transform_val # I dont think we need to use augmentations for validation


# Create data loaders with memory optimizations
train_loader = DataLoader(
    train_dataset, 
    batch_size=config.train_bs, 
    shuffle=True, 
    num_workers=config.num_workers, 
    pin_memory=True,
    drop_last=True,
    persistent_workers=True,
)
    
    
val_loader = DataLoader(
    val_dataset, 
    batch_size=config.val_bs, 
    shuffle=False, 
    num_workers=config.num_workers, 
    pin_memory=True
)
    
test_loader = DataLoader(
    test_dataset, 
    batch_size=config.val_bs, 
    shuffle=False, 
    num_workers=config.num_workers, 
    pin_memory=True
)

print(f"Train size: {len(train_dataset)}, Validation size: {len(val_dataset)}, Test size: {len(test_dataset)}")

Train size: 20375, Validation size: 3596, Test size: 650


In [3]:
model = config.model()


optimizer = config.optimizer(model.parameters())
print(f"Using device: {config.device}")

Num channels: 7
Using device: cpu


In [4]:
# Wandb logging
exp_name = "extended_unet_sota_depths"

print("Starting training...")
with wandb.init(project="MonocularDepthEstimation",  #CIL
                save_code=True,
                notes=config.WANDB_NOTES):
    model = train_model(model, train_loader, val_loader,
                        config.loss, optimizer, 1, config.device,
                       exp_path=os.path.join(config.dataset_path, exp_name),
                       is_extended_model=True, use_uncertainty_map=False)

Starting training...


[34m[1mwandb[0m: Currently logged in as: [33mltonkovic[0m ([33mltonkovic-eth-z-rich[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/1


Training:   0%|          | 0/2546 [00:00<?, ?it/s]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 1/2546 [00:17<12:23:18, 17.52s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 2/2546 [00:34<12:00:09, 16.98s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 3/2546 [00:50<11:43:36, 16.60s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 4/2546 [01:06<11:31:33, 16.32s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 5/2546 [01:22<11:25:33, 16.19s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 6/2546 [01:38<11:23:01, 16.13s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 7/2546 [01:54<11:30:26, 16.32s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 8/2546 [02:17<12:49:45, 18.20s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 9/2546 [02:47<15:33:54, 22.09s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 10/2546 [03:05<14:39:45, 20.81s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 11/2546 [03:22<13:53:31, 19.73s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 12/2546 [03:40<13:26:45, 19.10s/it]

RGB shape: torch.Size([8, 3, 448, 576])
Depth stack shape: torch.Size([8, 4, 448, 576])
Num of inputs 2
Inputs shape: torch.Size([8, 7, 448, 576])
Inputs shape: torch.Size([8, 7, 448, 576])


Training:   0%|          | 12/2546 [03:58<14:01:02, 19.91s/it]
Traceback (most recent call last):
  File "/tmp/ipykernel_91391/386376533.py", line 8, in <module>
    model = train_model(model, train_loader, val_loader,
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/lucijatonkovic/Documents/Git/CIL_2025/utils/train_utils.py", line 42, in train_model
    loss.backward()
  File "/home/lucijatonkovic/miniforge3/envs/machine_perception/lib/python3.12/site-packages/torch/_tensor.py", line 648, in backward
    torch.autograd.backward(
  File "/home/lucijatonkovic/miniforge3/envs/machine_perception/lib/python3.12/site-packages/torch/autograd/__init__.py", line 353, in backward
    _engine_run_backward(
  File "/home/lucijatonkovic/miniforge3/envs/machine_perception/lib/python3.12/site-packages/torch/autograd/graph.py", line 824, in _engine_run_backward
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
           ^^

MailboxClosedError: 

Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x7f32cfe84e00>> (for post_run_cell), with arguments args (<ExecutionResult object at 7f32cfbdfd40, execution_count=4 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 7f32cfbdfd10, raw_cell="# Wandb logging
exp_name = "extended_unet_sota_dep.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/home/lucijatonkovic/Documents/Git/CIL_2025/notebooks/train_extended_unet.ipynb#X14sZmlsZQ%3D%3D> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe