### Training Vlads model by first stacking the depths from the Sota models

In [1]:
import sys, os
import torch, wandb
import torch.nn as nn
from torch.utils.data import DataLoader
sys.path.append(os.path.abspath(os.path.join(os.curdir, '..')))
from configs import extended_unet_fusion_config as config
from models.unet_mit import Unet
from datasets.combination_depth_dataset import CombDepthDataset
from models.fusion_models import CNNFusionModel
from models.combination_model import CombinedModel
from utils.train_utils import train_model
import utils.train_utils as tu

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


In [2]:
# Set a fixed random seed for reproducibility
torch.manual_seed(config.random_seed)

train_full_dataset = CombDepthDataset(
    data_dir=os.path.join(config.dataset_path, 'train/train'),
    depths_dir=config.depth_maps_path,
    list_file=os.path.join(config.dataset_path, 'train_list.txt'),
    transform=config.padded_transform,
    target_transform=config.target_transform,
    has_gt=True,
    depth_model_names=config.depth_model_names,
    uncertainty_dir=None,
    use_uncertainty=None)
    
    # Create test dataset without ground truth
test_dataset = CombDepthDataset(
    data_dir=os.path.join(config.dataset_path, 'test/test'),
    depths_dir=os.path.join(config.depth_maps_path, 'test'),
    list_file=os.path.join(config.dataset_path, 'test_list.txt'),
    transform=config.padded_transform,
    has_gt=False,
    depth_model_names=config.depth_model_names,
    uncertainty_dir=None,
    use_uncertainty=None)  # Test set has no ground truth
    
# Split training dataset into train and validation
total_size = len(train_full_dataset)
train_size = int((1-config.val_part) * total_size)  
val_size = total_size - train_size    
    
train_dataset, val_dataset = torch.utils.data.random_split(
    train_full_dataset, [train_size, val_size]
)
#val_dataset.transform = config.transform_val # I dont think we need to use augmentations for validation


# Create data loaders with memory optimizations
train_loader = DataLoader(
    train_dataset, 
    batch_size=config.train_bs, 
    shuffle=True, 
    num_workers=config.num_workers, 
    pin_memory=True,
    drop_last=True,
    persistent_workers=True,
)
    
    
val_loader = DataLoader(
    val_dataset, 
    batch_size=config.val_bs, 
    shuffle=False, 
    num_workers=config.num_workers, 
    pin_memory=True
)
    
test_loader = DataLoader(
    test_dataset, 
    batch_size=config.val_bs, 
    shuffle=False, 
    num_workers=config.num_workers, 
    pin_memory=True
)

print(f"Train size: {len(train_dataset)}, Validation size: {len(val_dataset)}, Test size: {len(test_dataset)}")

Train size: 20375, Validation size: 3596, Test size: 650


In [3]:
model = config.model()




Num channels: 4


In [4]:
fusion_model = CNNFusionModel(input_channels=len(test_dataset.depth_model_names)).to(config.device)
model = CombinedModel(fusion_model=fusion_model, unet_model=model, use_uncertainty=False).to(config.device)

In [5]:
optimizer = config.optimizer(model.parameters())
print(f"Using device: {config.device}")

Using device: cuda:3


In [6]:
#exp_name = "extended_unet_sota_depths"
#model.load_state_dict(torch.load(f'{os.path.join(config.dataset_path, exp_name)}/best_model_43.pt'))

In [None]:
# Wandb logging
exp_name = "extended_unet_sota_depths_fusion_pretrain"

print("Starting training...")
with wandb.init(project="MonocularDepthEstimation",  #CIL
                save_code=True,
                notes=config.WANDB_NOTES):
    model = train_model(model, train_loader, val_loader,
                        config.loss, optimizer, 50, config.device,
                       exp_path=os.path.join(config.dataset_path, exp_name),
                       is_extended_model=True, use_uncertainty_map=False)

Starting training...


[34m[1mwandb[0m: Currently logged in as: [33mnoloo[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/50


Training: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2546/2546 [24:31<00:00,  1.73it/s]
Validation: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 225/225 [01:36<00:00,  2.33it/s]


Train Loss: 0.3048, Validation Loss: 0.1748
New best model saved at epoch 1 with validation loss: 0.1748
Epoch 2/50


Training: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2546/2546 [24:29<00:00,  1.73it/s]
Validation: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 225/225 [01:34<00:00,  2.37it/s]


Train Loss: 0.1910, Validation Loss: 0.1607
New best model saved at epoch 2 with validation loss: 0.1607
Epoch 3/50


Training: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2546/2546 [24:30<00:00,  1.73it/s]
Validation: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 225/225 [01:35<00:00,  2.36it/s]


Train Loss: 0.1724, Validation Loss: 0.1464
New best model saved at epoch 3 with validation loss: 0.1464
Epoch 4/50


Training:  22%|█████████████████████████████████▎                                                                                                                        | 551/2546 [05:19<19:10,  1.73it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Training: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2546/2546 [24:29<00:00,  1.73it/s]
Validation: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 225/225 [01:33<00:00,  2.40it/s]


Train Loss: 0.0988, Validation Loss: 0.1065
Epoch 28/50


Training:   6%|█████████                                                                                                                                                 | 149/2546 [01:27<23:08,  1.73it/s]

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
1

In [None]:
import datasets
importlib.reload(datasets)

In [None]:

test_dataset = datasets.combination_depth_dataset.CombDepthDataset(
    data_dir=os.path.join(config.dataset_path, 'test/test'),
    depths_dir=os.path.join(config.depth_maps_path, 'test'),
    list_file=os.path.join(config.dataset_path, 'test_list.txt'),
    transform=config.padded_transform,
    has_gt=False,
    depth_model_names=config.depth_model_names,
    uncertainty_dir=None,
    use_uncertainty=None)  # Test set has no ground truth
test_loader = DataLoader(
    test_dataset, 
    batch_size=config.val_bs, 
    shuffle=False, 
    num_workers=config.num_workers, 
    pin_memory=True
)

In [None]:
import utils.train_utils as tu
tu.generate_test_predictions(model, test_loader, config.device,
                          exp_path=os.path.join(config.dataset_path, exp_name),
                            is_exntended_model=True)

In [None]:

importlib.reload(tu)
tu.visualize_test_predictions(model, test_loader, config.device,
                              exp_path=os.path.join(config.dataset_path, exp_name), is_exntended_model=True)