# Segformer Mit-b5 Architecture training

In [1]:
# Attach Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# set to root drive directory if cell gets rerun
%cd /content/drive
# set to root directory of project
%cd MyDrive/github_repos/K1702_clover_analysis
!ls

/content/drive
/content/drive/MyDrive/github_repos/K1702_clover_analysis
assets	data  LICENSE  lightning_logs  output  README.md  src


In [3]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.1.2-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.8.0 (from lightning)
  Downloading lightning_utilities-0.10.0-py3-none-any.whl (24 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.2.1-py3-none-any.whl (806 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m806.1/806.1 kB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.1.2-py3-none-any.whl (776 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m776.9/776.9 kB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: lightning-utilities, torchmetrics, pytorch-lightning, lightning
Successfully installed lightning-2.1.2 lightning-utilities-0.10.0 pytorch-lightning-2.1.2 torchmetrics-1.2.1


In [11]:
# Import statements
import sys

sys.path.append("./src/image_segmentation/src/")


import os
import numpy as np

from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor, EarlyStopping

from transformers import SegformerImageProcessor

from transforms import get_train_transforms, get_val_transforms
from datasets import CloverDataset
from data_module import SegformerDataModule
from model_module import SegformerModel
from model import create_model


In [5]:
# Configurations
NUM_CLASSES = 3
IMG_SIZE=(800,800)
DEVICE = 'cuda'
BATCH_SIZE = 4
NUM_WORKERS = 2

In [6]:
# Set directories
TRAIN_IMG_DIR = "./data/images/train"
VAL_IMG_DIR = "./data/images/val"
TEST_IMG_DIR = "./data/images/test"

TRAIN_MASK_DIR = "./data/annotations/train"
VAL_MASK_DIR = "./data/annotations/val"
TEST_MASK_DIR = "./data/annotations/test"

In [7]:
# Create datasets
train_dataset = CloverDataset(
    img_dir=TRAIN_IMG_DIR,
    mask_dir=TRAIN_MASK_DIR,
    img_processor=SegformerImageProcessor(
        do_reduce_labels=True
    ),
    transforms=get_train_transforms(
        img_size=IMG_SIZE
    )
)

val_dataset = CloverDataset(
    img_dir=VAL_IMG_DIR,
    mask_dir=VAL_MASK_DIR,
    img_processor=SegformerImageProcessor(
        do_reduce_labels=True
    ),
    transforms=get_val_transforms(
        img_size=IMG_SIZE
    )
)

test_dataset = CloverDataset(
    img_dir=TEST_IMG_DIR,
    mask_dir=TEST_MASK_DIR,
    img_processor=SegformerImageProcessor(
        do_reduce_labels=True
    ),
    transforms=get_val_transforms(
        img_size=IMG_SIZE
    )
)

In [8]:
# Create the segformer data module
sdm = SegformerDataModule(
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    test_dataset=test_dataset,
    num_workers=NUM_WORKERS,
    batch_size=BATCH_SIZE
)

In [9]:
# Create the segformer model module
seg_mod = SegformerModel(
    num_classes=NUM_CLASSES,
    img_size=IMG_SIZE,
    model_variant='nvidia/mit-b5'
)

config.json:   0%|          | 0.00/70.0k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/328M [00:00<?, ?B/s]

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b5 and are newly initialized: ['decode_head.batch_norm.num_batches_tracked', 'decode_head.linear_c.3.proj.weight', 'decode_head.batch_norm.running_var', 'decode_head.linear_fuse.weight', 'decode_head.linear_c.0.proj.weight', 'decode_head.batch_norm.weight', 'decode_head.batch_norm.running_mean', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.classifier.bias', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.batch_norm.bias', 'decode_head.linear_c.1.proj.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.2.proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
# Create a Tensorboard Logger
logger = TensorBoardLogger("./src/image_segmentation/runs", name="colab_mit_b0", version="no_val")

# Callbacks
lr_monitor = LearningRateMonitor(logging_interval='epoch')

checkpoint_callback = ModelCheckpoint(
    save_top_k=5,
    monitor="train_loss",
    mode="min",
    filename="segformer-{epoch:02d}-{train_loss:.3f}",
    save_on_train_epoch_end=True
)

early_stopping = EarlyStopping(
    monitor="train_loss",
    min_delta=0.00,
    patience=5,
    mode='min'
)


In [13]:
# Create the lightining trainer
trainer = Trainer(
    accelerator='gpu',
    devices=1,
    max_epochs=20,
    logger=logger,
    callbacks=[checkpoint_callback, lr_monitor, early_stopping]
)

trainer.fit(seg_mod, sdm)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/configuration_validator.py:72: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                             | Params
-----------------------------------------------------------
0 | model | SegformerForSemanticSegmentation | 84.6 M
-----------------------------------------------------------
84.6 M    Trainable params
0         Non-trainable params
84.6 M    Total params
338.383   Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]