In [2]:
%reload_ext autoreload
%autoreload 2

import os
import sys
import cv2
import timm
import torch
import transformers
import numpy as np
import pandas as pd
import ruamel.yaml as yaml
import pytorch_lightning as pl
import matplotlib.pyplot as plt

from PIL import Image
from pathlib import Path
from sklearn.model_selection import train_test_split
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger

from scripts.prepare_data import rle_decode, tile_geotiff, process_masks, create_synthetic_test
from scripts.train_vit import ShipPatchDataset, ViTShipClassifier, get_augmentation_transforms, create_data_loader

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
config_path = "configs/vit.yaml"
manifest_path = "data/airbus-ship-detection/train_ship_segmentations_v2.csv"

output_dir = "outputs"

In [4]:
with open(config_path, 'rb') as f:
    config = yaml.YAML(typ='rt').load(f)
pl.seed_everything(config['data']['random_seed'])
Path(output_dir).mkdir(parents=True, exist_ok=True)

Seed set to 42


In [5]:
# train_loader, val_loader = create_data_loader(manifest_path, config)
df_mani = pd.read_csv(manifest_path)
df_mani['has_ship'] = df_mani['EncodedPixels'].apply(lambda x: 0 if pd.isna(x) else 1)
df_mani['patch_path'] = df_mani['ImageId'].apply(lambda x: f"data/airbus-ship-detection/train_v2/{x}")
train_df, val_df = train_test_split(
    df_mani,
    test_size=config['data']['val_split'],
    random_state=config['data']['random_seed'],
    stratify=df_mani['has_ship']
)

In [6]:
df_mani

Unnamed: 0,ImageId,EncodedPixels,has_ship,patch_path
0,00003e153.jpg,,0,data/airbus-ship-detection/train_v2/00003e153.jpg
1,0001124c7.jpg,,0,data/airbus-ship-detection/train_v2/0001124c7.jpg
2,000155de5.jpg,264661 17 265429 33 266197 33 266965 33 267733...,1,data/airbus-ship-detection/train_v2/000155de5.jpg
3,000194a2d.jpg,360486 1 361252 4 362019 5 362785 8 363552 10 ...,1,data/airbus-ship-detection/train_v2/000194a2d.jpg
4,000194a2d.jpg,51834 9 52602 9 53370 9 54138 9 54906 9 55674 ...,1,data/airbus-ship-detection/train_v2/000194a2d.jpg
...,...,...,...,...
231718,fffedbb6b.jpg,,0,data/airbus-ship-detection/train_v2/fffedbb6b.jpg
231719,ffff2aa57.jpg,,0,data/airbus-ship-detection/train_v2/ffff2aa57.jpg
231720,ffff6e525.jpg,,0,data/airbus-ship-detection/train_v2/ffff6e525.jpg
231721,ffffc50b4.jpg,,0,data/airbus-ship-detection/train_v2/ffffc50b4.jpg


In [9]:
# Get transform
aug_config = config['augmentation']

train_transforms = [
    transforms.RandomHorizontalFlip(p=aug_config.get('hflip_prob', 0.5)),
    transforms.RandomVerticalFlip(p=aug_config.get('vflip_prob', 0.5)),
]

if aug_config.get('rotation', False):
    train_transforms.append(transforms.RandomRotation(degrees=90))

if aug_config.get('color_jitter', False):
    train_transforms.append(
        transforms.ColorJitter(
            brightness=0.2,
            contrast=0.2,
            saturation=0.2,
            hue=0.1
        )
    )
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

train_transforms.extend([
    transforms.ToTensor(),
    normalize
])

val_transforms = transforms.Compose([
    transforms.ToTensor(),
    normalize
])

# train_transforms, val_transforms = transforms.Compose(train_transforms), transforms.Compose(val_transforms)

In [10]:
# Create datasets
train_dataset = ShipPatchDataset(train_df, transform=train_transforms)
val_dataset = ShipPatchDataset(val_df, transform=val_transforms)

In [11]:
# handling imbalance class with weighted sampling
if config['training'].get('use_weighted_sampler', False):
    train_labels = train_df['has_ship'].values
    class_counts = np.bincount(train_labels)
    class_weights = 1.0 / class_counts
    sample_weights = class_weights[train_labels]
    
    sampler =WeightedRandomSampler(
        weights=sample_weights,
        num_samples=len(train_dataset),
        replacement=True
    )
    shuffle = False
else:
    sampler = None
    shuffle = True

In [12]:
train_loader = DataLoader(
    train_dataset,
    batch_size=config['training']['batch_size'],
    shuffle=shuffle,
    sampler=sampler,
    num_workers=config['data']['num_workers'],
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=config['training']['batch_size'],
    shuffle=False,
    num_workers=config['data']['num_workers'],
    pin_memory=True,
)

In [14]:
# model = ViTShipClassifier(config)
# torch.save(model.state_dict(), f'{output_dir}/pretrained/vit_base_patch16_224.pth')
config['model']['pretrained'] = False
model = ViTShipClassifier(config)
if config['model']['pretrained'] == False:
    state_dict = torch.load(f"model_states/pretrained/{config['model']['name']}.pth", map_location='cpu')
    model.load_state_dict(state_dict)

In [15]:
model.to('cuda')

ViTShipClassifier(
  (model): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (norm): Identity()
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
       

In [16]:
callbacks = [
    ModelCheckpoint(
        dirpath=os.path.join(output_dir, 'checkpoints'),
        filename='vit-{epoch:02d}-{val_acc:.3f}',
        monitor='val_acc',
        mode='max',
        save_top_k=3,
        save_last=True,
    ),
    EarlyStopping(
        monitor='val_loss',
        patience=config['training']['early_stopping_patience'],
        mode='min'
    ),
    LearningRateMonitor(logging_interval='epoch')
]
logger = TensorBoardLogger(
    save_dir=output_dir,
    name='vit_logs'
)

In [17]:
trainer = pl.Trainer(
    max_epochs=config['training']['max_epochs'],
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    devices=1,
    callbacks=callbacks,
    logger=logger,
    log_every_n_steps=10,
    deterministic=True,
    precision=config['training'].get('precision', 32)
)

e:\Computer Vision\ship-detector\.venv\Lib\site-packages\lightning_fabric\connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [20]:
trainer.fit(model, train_loader, val_loader)

You are using a CUDA device ('NVIDIA GeForce RTX 5060 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
e:\Computer Vision\ship-detector\.venv\Lib\site-packages\pytorch_lightning\utilities\model_summary\model_summary.py:231: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | VisionTransformer | 85.8 M | train
1 | criterion | BCEWithLogitsLoss | 0      | train
--------------------------------------------------------
85.8 M    Trainable params
0         Non-trainable params
85.8 M    Tot

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

e:\Computer Vision\ship-detector\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

AssertionError: Input height (768) doesn't match model (224).