In [1]:
%reload_ext autoreload
%autoreload 2

import os
import sys
import cv2
import timm
import torch
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import matplotlib.pyplot as plt

from PIL import Image
from pathlib import Path
from sklearn.model_selection import train_test_split
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger

from ship_detector.scripts.train_unet import (ShipSegmentationDataset, 
                                              DiceBCELoss, 
                                              UNetShipSegmentation, 
                                              get_augmentation_transforms, 
                                              handle_multi_ship_instances, 
                                              create_data_loaders)
from ship_detector.scripts.utils import load_config

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
config_path = 'configs/unet.yaml'
manifest_path = 'data/airbus-ship-detection/train_ship_segmentations_v2.csv'
output_dir = 'outputs/unet_experiments/'

In [3]:
config = load_config(config_path)

In [4]:
config

{'model': {'encoder': 'efficientnet-b3', 'encoder_weights': 'imagenet', 'freeze_encoder': True, 'freeze_strategy': 'full', 'unfreeze_epoch': 5, 'freeze_bn': True}, 'loss': {'dice_weight': 0.7, 'bce_weight': 0.3}, 'training': {'batch_size': 16, 'max_epochs': 10, 'early_stopping_patience': 7, 'precision': 16}, 'optimizer': {'name': 'adamw', 'lr': 0.001, 'encoder_lr_scale': 0.1, 'weight_decay': 0.01}, 'scheduler': {'name': 'cosine', 'T_max': 40, 'eta_min': 1e-06}, 'augmentation': {'strong_aug': True, 'color_aug': True, 'noise_aug': True}, 'data': {'val_split': 0.2, 'num_workers': 4, 'random_seed': 42}}

In [5]:
pl.seed_everything(42)

Seed set to 42


42

In [6]:
Path(output_dir).mkdir(parents=True, exist_ok=True)

In [7]:
train_loader, val_loader = create_data_loaders(manifest_path=manifest_path, config=config)

Found 81723 patches with ships
Training samples: 65378
Validation samples: 16345
Using 65378 ship patches for segmentation
Using 16345 ship patches for segmentation


  original_init(self, **validated_kwargs)
  A.ElasticTransform(p=1, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
  A.OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5)
  A.GaussNoise(var_limit=(10.0, 50.0), p=1),


In [8]:
model = UNetShipSegmentation(config)
# torch.save(model.state_dict(), f"model_states/pretrained/efficientnet-b3__imagenet.pth")

In [9]:
model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

UNetShipSegmentation(
  (model): Unet(
    (encoder): EfficientNetEncoder(
      (_conv_stem): Conv2dStaticSamePadding(
        3, 40, kernel_size=(3, 3), stride=(2, 2), bias=False
        (static_padding): ZeroPad2d((0, 1, 0, 1))
      )
      (_bn0): BatchNorm2d(40, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_blocks): ModuleList(
        (0): MBConvBlock(
          (_expand_conv): Identity()
          (_bn0): Identity()
          (_depthwise_conv): Conv2dStaticSamePadding(
            40, 40, kernel_size=(3, 3), stride=[1, 1], groups=40, bias=False
            (static_padding): ZeroPad2d((1, 1, 1, 1))
          )
          (_bn1): BatchNorm2d(40, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
          (_se_reduce): Conv2dStaticSamePadding(
            40, 10, kernel_size=(1, 1), stride=(1, 1)
            (static_padding): Identity()
          )
          (_se_expand): Conv2dStaticSamePadding(
            

In [10]:
callbacks = [
    ModelCheckpoint(
        dirpath=os.path.join(output_dir, 'checkpoints'),
        filename='unet-{epoch:02d}-{val_iou:.3f}',
        monitor='val_iou',
        mode='max',
        save_top_k=3,
        save_last=True,
    ),
    EarlyStopping(
        monitor='val_loss',
        patience=config['training']['early_stopping_patience'],
        mode='min',
    ),
    LearningRateMonitor(logging_interval='epoch'),
]

logger = TensorBoardLogger(
    save_dir=output_dir,
    name='unet_log',
)

In [11]:
trainer = pl.Trainer(
    max_epochs=config['training']['max_epochs'],
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    devices=1,
    callbacks=callbacks,
    logger=logger,
    log_every_n_steps=10,
    precision=config['training'].get('precision', 32)
)

e:\Computer Vision\ship-detector\.venv\Lib\site-packages\lightning_fabric\connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, train_loader, val_loader)

You are using a CUDA device ('NVIDIA GeForce RTX 5060 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
e:\Computer Vision\ship-detector\.venv\Lib\site-packages\pytorch_lightning\utilities\model_summary\model_summary.py:231: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.

  | Name      | Type        | Params | Mode 
--------------------------------------------------
0 | model     | Unet        | 13.2 M | train
1 | criterion | DiceBCELoss | 0      | train
--------------------------------------------------
2.5 M     Trainable params
10.7 M    Non-trainable params
13.2 M    Total params
52.636    Total estimated model params size (MB)
397       Modules in train mode
78        Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

e:\Computer Vision\ship-detector\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                           

e:\Computer Vision\ship-detector\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Epoch 1:   0%|          | 10/4086 [00:46<5:12:49,  0.22it/s, v_num=1, val_loss=0.700, val_iou=1.000, train_loss=0.702, train_iou=0.991]