In [1]:
# TODO: validate that DataLoader with augmented dataset on each new iteration produces samples with different augmentations (doesn't cache)
#   Add print statement to Dataset augmentation part
#   Start training model with limited dataset length (e.g.,) 10 images
#   Ensure that debug print message is always printed on new epoch

In [2]:
import os

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

import torch

torch.set_float32_matmul_precision("medium")  # Try "high" as well

import matplotlib.pyplot as plt

from torch.utils.data import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import pytorch_lightning as pl
from pytorch_lightning.loggers import MLFlowLogger
import mlflow
from typing import Tuple, List
import numpy as np

import coremltools as ct

from src.dataset import SimpleWallADE20KDataset
from src.model import WallModel
from src import config
from src.transform import get_preprocessing_transform, get_train_augmentations, get_val_augmentations

scikit-learn version 1.2.2 is not supported. Minimum required version: 0.17. Maximum required version: 1.1.2. Disabling scikit-learn conversion API.


In [6]:
TRAIN_SIZE = 8
VAL_SIZE = 8

# TODO: add option to not enforce specific image size, just pad to a multiple of 32,
#   as it was done for image segmentation sample
#   But first check if CoreML supports dynamic input size
train_augmentations = get_train_augmentations()
train_dataset = SimpleWallADE20KDataset(
    root=config.DATA_ROOT,
    mode='train',
    length=TRAIN_SIZE,
    augmentation_fn=train_augmentations,
    preprocessing_fn=get_preprocessing_transform(config.ENCODER)
)

val_augmentations = get_val_augmentations()
val_dataset = SimpleWallADE20KDataset(
    root=config.DATA_ROOT,
    mode='val',
    length=TRAIN_SIZE,
    augmentation_fn=val_augmentations,
    preprocessing_fn=get_preprocessing_transform(config.ENCODER)
)

print(f"Train size: {len(train_dataset)}")
print(f"Validation size: {len(val_dataset)}")

n_cpu = os.cpu_count()
print(f'Number of CPUs: {n_cpu}')
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=False, num_workers=n_cpu)
val_dataloader = DataLoader(val_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=n_cpu)



Train size: 8
Validation size: 8
Number of CPUs: 20


In [7]:
wall_model = WallModel(
    architecture=config.ARCHITECTURE,
    encoder_name=config.ENCODER,
    in_channels=3,
    out_classes=1,
    learning_rate=config.LEARNING_RATE,
)

In [8]:
trainer = pl.Trainer(
    devices=1,
    max_epochs=8,
)

trainer.fit(
    wall_model,
    train_dataloaders=train_dataloader,
    #val_dataloaders=val_dataloader
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params
------------------------------------------
0 | model   | DeepLabV3Plus | 11.3 M
1 | loss_fn | DiceLoss      | 0     
------------------------------------------
11.3 M    Trainable params
0         Non-trainable params
11.3 M    Total params
45.140    Total estimated model params size (MB)


Augmenting sample 2
Augmenting sample 4
Augmenting sample 5
Augmenting sample 0
Augmenting sample 3
Augmenting sample 1


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Augmenting sample 6
Augmenting sample 2
Augmenting sample 0
Augmenting sample 7
Augmenting sample 4
Augmenting sample 1
Augmenting sample 5
Augmenting sample 3
Augmenting sample 6
Augmenting sample 2
Augmenting sample 4
Augmenting sample 0
Augmenting sample 5
Augmenting sample 3
Augmenting sample 7
Augmenting sample 1
Augmenting sample 2Augmenting sample 6

Augmenting sample 4
Augmenting sample 0
Augmenting sample 3
Augmenting sample 5
Augmenting sample 7
Augmenting sample 1
Augmenting sample 6
Augmenting sample 2
Augmenting sample 4
Augmenting sample 7
Augmenting sample 5Augmenting sample 0

Augmenting sample 1Augmenting sample 3

Augmenting sample 6
Augmenting sample 2
Augmenting sample 0
Augmenting sample 4
Augmenting sample 1
Augmenting sample 5
Augmenting sample 7
Augmenting sample 3
Augmenting sample 6Augmenting sample 2

Augmenting sample 0
Augmenting sample 4
Augmenting sample 5
Augmenting sample 1
Augmenting sample 3
Augmenting sample 7
Augmenting sample 6
Augmenting sample 2


`Trainer.fit` stopped: `max_epochs=8` reached.
