In [1]:
import os
import torch 

import numpy as np
import matplotlib.pyplot as plt
import albumentations as A
from PIL import Image

from datasets import load_metric
from transformers import SegformerImageProcessor
from transformers import SegformerForSemanticSegmentation

from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, Dataset
from training.dataset import SemanticSegmentationDataset
from peft import LoraConfig, get_peft_model

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


In [4]:
augs = A.Compose([
              A.RandomSizedCrop( (1000, 1000) , 1000, 1000,  ratio=(0.75, 1.33), p = 0.7),
             A.RandomRain(p=0.05),
             A.ShiftScaleRotate(p =0.4),
             A.RGBShift(p =0.1),
             A.Blur(p =0.2),
             A.GaussNoise(p =0.2),
             A.ElasticTransform(p =0.2),
             A.MaskDropout((10,15), p =0.03),
             A.MotionBlur(p=0.3), 
             A.RandomFog(p=0.3)
        ], p=0.7)

  A.RandomSizedCrop( (1000, 1000) , 1000, 1000,  ratio=(0.75, 1.33), p = 0.7),


In [5]:
image_processor = SegformerImageProcessor(reduce_labels=True)
img_dir = './data/imgs'
masks_dir = './data/masks'

train_ds, eval_ds = SemanticSegmentationDataset.get_train_and_eval_datasets(
    processor, img_dir, masks_dir
)



In [6]:
train_dataloader = DataLoader(train_ds, batch_size=8, shuffle=True)
eval_dataloader = DataLoader(eval_ds, batch_size=8, shuffle=True)

In [7]:
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b0",
                                                         num_labels=6,
                                                       #  id2label=id2label,
                                                       #  label2id=label2id,
)
config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    bias="lora_only",
    modules_to_save=["decode_head"],
)

model = get_peft_model(model, config)

model.to('cuda')

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PeftModel(
  (base_model): LoraModel(
    (model): SegformerForSemanticSegmentation(
      (segformer): SegformerModel(
        (encoder): SegformerEncoder(
          (patch_embeddings): ModuleList(
            (0): SegformerOverlapPatchEmbeddings(
              (proj): Conv2d(3, 32, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
              (layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
            )
            (1): SegformerOverlapPatchEmbeddings(
              (proj): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
              (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
            )
            (2): SegformerOverlapPatchEmbeddings(
              (proj): Conv2d(64, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
              (layer_norm): LayerNorm((160,), eps=1e-05, elementwise_affine=True)
            )
            (3): SegformerOverlapPatchEmbeddings(
              (proj): Conv2d(160, 256, kernel_size=

In [9]:
log_path = './lora_segformer'
writer = SummaryWriter(log_path)
metric = load_metric("mean_iou")

In [None]:
import torch
from tqdm import tqdm

optimizer = torch.optim.AdamW(model.parameters(), lr=0.0005)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

train_loss_iter = []
train_loss_epoch = []
eval_iou = []
eval_acc = []
eval_loss = []
model.train()
best_iou = 0 
best_acc = 0
for epoch in tqdm(range(200)):  # loop over the dataset multiple times
    print("Epoch:", epoch)
    curr_epoch_loss = []
    curr_epoch_eval_loss = []
    for idx, batch in enumerate(train_dataloader):
        
        pixel_values = batch["pixel_values"].to(device)
        labels = batch["labels"].to(device)
        optimizer.zero_grad()
        outputs = model(pixel_values=pixel_values, labels=labels)
        loss, logits = outputs.loss, outputs.logits

        loss.backward()
        optimizer.step()
        #print()
        train_loss_iter.append(loss.item())
        curr_epoch_loss.append(loss.item())
        writer.add_scalar("Train/loss_step", train_loss_iter[-1], idx + epoch * len(train_dataloader))
        writer.add_scalar("Train/epoch", epoch + 1, idx + epoch * len(train_dataloader))
    train_loss_epoch.append(sum(curr_epoch_loss) / len(curr_epoch_loss))   
    writer.add_scalar("Train/loss_epoch", train_loss_epoch[-1], epoch + 1)
    with torch.no_grad():
        
        for batch in eval_dataloader:
            pixel_values = batch["pixel_values"].to(device)
            labels = batch["labels"].to(device)
            outputs = model(pixel_values=pixel_values, labels=labels)
            loss, logits = outputs.loss, outputs.logits
            curr_epoch_eval_loss.append(outputs.loss.item())
            upsampled_logits = torch.nn.functional.interpolate(logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
            predicted = upsampled_logits.argmax(dim=1)
            metric.add_batch(predictions=predicted.detach().cpu().numpy(), references=labels.detach().cpu().numpy())
            
        metrics = metric.compute(num_labels=6, 
                                       ignore_index=255,
                                       reduce_labels=False, # we've already reduced the labels before)
            )
        eval_iou.append( metrics["mean_iou"])
        eval_acc.append(metrics["mean_accuracy"])
        eval_loss.append(sum(curr_epoch_eval_loss) / len(eval_dataloader))
        
        writer.add_scalar("Eval/loss",eval_loss[-1], epoch + 1)
        writer.add_scalar("Eval/Accuracy", metrics["mean_accuracy"], epoch + 1)
        writer.add_scalar("Eval/IoU", metrics["mean_iou"], epoch + 1)
        
        print("Mean_iou:", metrics["mean_iou"])
        print("Loss:", train_loss_epoch[-1])
        print("Mean accuracy:", metrics["mean_accuracy"])
        
        if metrics["mean_iou"] > best_iou: 
            torch.save({"model" : model.state_dict() }, 'bect_iou_segformer_lora.ckpt') 
            best_iou = metrics["mean_iou"]
        if metrics["mean_accuracy"] > best_acc: 
            torch.save({"model" : model.state_dict() }, 'bect_acc_segformer_lora.ckpt') 
            best_acc = metrics["mean_accuracy"]

  0%|                                                   | 0/200 [00:00<?, ?it/s]

Epoch: 0
Mean_iou: 0.1528387801737399
Loss: 1.5240730709499783
Mean accuracy: 0.27104480108992396


  0%|▏                                      | 1/200 [03:23<11:14:48, 203.46s/it]

Epoch: 1


In [None]:
!nvidia-smi

In [2]:
!kill -9 14330

/bin/bash: line 1: kill: (14330) - No such process
