<a href="https://colab.research.google.com/github/HindA12/SegformerForSemanticSegmentation/blob/main/SegFormer_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m45.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.2/224.2 kB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m99.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
import torchvision
import torch.nn.functional as F
from torch.autograd import Variable
import cv2
from transformers import SegformerImageProcessor
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [3]:
!unzip -q /content/drive/MyDrive/Tumor_Segmentation/archive.zip

In [4]:
image_path = '/content/images'
mask_path = '/content/masks'

In [5]:
def create_df():
    name = []
    for dirname, _, filenames in os.walk(image_path):
        for filename in filenames:
            name.append(filename.split('.')[0])
    
    return pd.DataFrame({'id': name}, index = np.arange(0, len(name)))

df = create_df()
print('Total Images: ', len(df))

Total Images:  3064


In [6]:
X_trainval, X_test = train_test_split(df['id'].values, test_size=0.1, random_state=19)
X_train, X_val = train_test_split(X_trainval, test_size=0.15, random_state=19)

print('Train Size   : ', len(X_train))
print('Val Size     : ', len(X_val))
print('Test Size    : ', len(X_test))

Train Size   :  2343
Val Size     :  414
Test Size    :  307


In [7]:
extension = '.png'
X_train = [image + extension for image in X_train]
X_val = [image + extension for image in X_val]
X_test = [image + extension for image in X_test]

In [8]:
!mkdir /content/Train
!mkdir /content/Test 
!mkdir /content/Val
!mkdir /content/Train/image
!mkdir /content/Train/mask
!mkdir /content/Val/image
!mkdir /content/Val/mask
!mkdir /content/Test/image
!mkdir /content/Test/mask

In [9]:
import shutil

for i in range(len(X_train)) :
  old_path = '/content/images/' + X_train[i]
  new_path = '/content/Train/image/' + X_train[i]
  shutil.move(old_path, new_path)
  old_path = '/content/masks/' + X_train[i]
  new_path = '/content/Train/mask/' + X_train[i]
  shutil.move(old_path, new_path)

for i in range(len(X_val)) :
  old_path = '/content/images/' + X_val[i]
  new_path = '/content/Val/image/' + X_val[i]
  shutil.move(old_path, new_path)
  old_path = '/content/masks/' + X_val[i]
  new_path = '/content/Val/mask/' + X_val[i]
  shutil.move(old_path, new_path)

for i in range(len(X_test)) :
  old_path = '/content/images/' + X_test[i]
  new_path = '/content/Test/image/' + X_test[i]
  shutil.move(old_path, new_path)
  old_path = '/content/masks/' + X_test[i]
  new_path = '/content/Test/mask/' + X_test[i]
  shutil.move(old_path, new_path)

In [10]:
class ImageSegmentationDataset(Dataset):
    """Image segmentation dataset."""

    def __init__(self, root_dir, feature_extractor, transforms=None, train=True):
        """
        Args:
            root_dir (string): Root directory of the dataset containing the images + annotations.
            feature_extractor (SegFormerFeatureExtractor): feature extractor to prepare images + segmentation maps.
            train (bool): Whether to load "training" or "validation" images + annotations.
        """
        self.root_dir = root_dir
        self.feature_extractor = feature_extractor
        self.train = train
        self.transforms = transforms

        self.img_dir = os.path.join(self.root_dir, "image")
        self.ann_dir = os.path.join(self.root_dir, "mask")
        
        # read images
        image_file_names = []
        for root, dirs, files in os.walk(self.img_dir):
            image_file_names.extend(files)
        self.images = sorted(image_file_names)
        
        # read annotations
        annotation_file_names = []
        for root, dirs, files in os.walk(self.ann_dir):
            annotation_file_names.extend(files)
        self.annotations = sorted(annotation_file_names)

        assert len(self.images) == len(self.annotations), "There must be as many images as there are segmentation maps"

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        
        image = cv2.imread(os.path.join(self.img_dir, self.images[idx]))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        segmentation_map = cv2.imread(os.path.join(self.ann_dir, self.annotations[idx]))
        segmentation_map = cv2.cvtColor(segmentation_map, cv2.COLOR_BGR2GRAY)

        if self.transforms is not None:
            augmented = self.transforms(image=image, mask=segmentation_map)

            encoded_inputs = self.feature_extractor(augmented['image'], augmented['mask'], return_tensors="pt")
        else:
            encoded_inputs = self.feature_extractor(image, segmentation_map, return_tensors="pt")

        for k,v in encoded_inputs.items():
            encoded_inputs[k].squeeze_() 

        return encoded_inputs

In [11]:
transform = A.Compose([
    A.Flip(p=0.5)
])

In [12]:
root_dir_train = ('/content/Train')
root_dir_val = ('/content/Val')
root_dir_test = ('/content/Test')

feature_extractor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
feature_extractor.do_reduce_labels = False
feature_extractor.size = 128


train_dataset = ImageSegmentationDataset(root_dir_train, feature_extractor=feature_extractor, transforms=transform)
val_dataset = ImageSegmentationDataset(root_dir_val, feature_extractor=feature_extractor, train=False)
test_dataset = ImageSegmentationDataset(root_dir_test, feature_extractor=feature_extractor, train=False)

Downloading (…)rocessor_config.json:   0%|          | 0.00/271 [00:00<?, ?B/s]



In [13]:
batch_size = 8
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

In [14]:
id2label = {0: 'background', 255: 'tumor'}
label2id = {'background' : 0, 'tumor': 255}

In [15]:
!pip -q install pytorch_lightning datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.6/718.6 kB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.7/468.7 kB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.2/212.2 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.9/132.9 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m269.3/269.3 kB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [16]:
import pytorch_lightning as pl
from transformers import SegformerForSemanticSegmentation
from datasets import load_metric

In [17]:
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b0",
                                                         num_labels=2, 
                                                         id2label=id2label, 
                                                         label2id=label2id,)

Downloading (…)lve/main/config.json:   0%|          | 0.00/70.0k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/14.4M [00:00<?, ?B/s]

Some weights of the model checkpoint at nvidia/mit-b0 were not used when initializing SegformerForSemanticSegmentation: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.num_batches_tracked', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.classifier.weight', 'decode_head.batch_norm.running_mean', 'decode

In [21]:
metric = load_metric("mean_iou")

  metric = load_metric("mean_iou")


Downloading builder script:   0%|          | 0.00/3.15k [00:00<?, ?B/s]

In [23]:
from tqdm.notebook import tqdm

# define optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00006)


model.train()
for epoch in range(10):  # loop over the dataset multiple times
   print("Epoch:", epoch)
   for idx, batch in enumerate(tqdm(train_dataloader)):
        # get the inputs;
        pixel_values = batch["pixel_values"]
        labels = batch["labels"]

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(pixel_values=pixel_values, labels=labels)
        loss, logits = outputs.loss, outputs.logits
        
        loss.backward()
        optimizer.step()

        # evaluate
        with torch.no_grad():
          upsampled_logits = nn.functional.interpolate(logits, size=labels.shape[-2:], mode="bilinear", align_corners=False)
          predicted = upsampled_logits.argmax(dim=1)
          
          # note that the metric expects predictions + labels as numpy arrays
          metric.add_batch(predictions=predicted.detach().cpu().numpy(), references=labels.detach().cpu().numpy())

        # let's print loss and metrics every 100 batches
        if idx % 100 == 0:
          metrics = metric._compute(references = labels.cpu(), 
                                    predictions = predicted.cpu(),
                                   num_labels=len(id2label), 
                                   ignore_index=255,
                                   reduce_labels=False, # we've already reduced the labels before)
          )
          print("Loss:", loss.item())
          print("Mean_iou:", metrics["mean_iou"])
          print("Mean accuracy:", metrics["mean_accuracy"])

Epoch: 0


  0%|          | 0/293 [00:00<?, ?it/s]

  acc = total_area_intersect / total_area_label


Loss: 0.6346340179443359
Mean_iou: 0.3635854385323385
Mean accuracy: 0.727170877064677


  iou = total_area_intersect / total_area_union


Loss: 0.195694699883461
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.09836321324110031
Mean_iou: 1.0
Mean accuracy: 1.0
Epoch: 1


  0%|          | 0/293 [00:00<?, ?it/s]

Loss: 0.059809017926454544
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.03959131985902786
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.028968550264835358
Mean_iou: 1.0
Mean accuracy: 1.0
Epoch: 2


  0%|          | 0/293 [00:00<?, ?it/s]

Loss: 0.021443286910653114
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.01672101765871048
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.013660438358783722
Mean_iou: 1.0
Mean accuracy: 1.0
Epoch: 3


  0%|          | 0/293 [00:00<?, ?it/s]

Loss: 0.011439944617450237
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.009074041619896889
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.007647866848856211
Mean_iou: 1.0
Mean accuracy: 1.0
Epoch: 4


  0%|          | 0/293 [00:00<?, ?it/s]

Loss: 0.007193980272859335
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.005639246199280024
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.004914330318570137
Mean_iou: 1.0
Mean accuracy: 1.0
Epoch: 5


  0%|          | 0/293 [00:00<?, ?it/s]

Loss: 0.004248024895787239
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.003669541561976075
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.0033214790746569633
Mean_iou: 1.0
Mean accuracy: 1.0
Epoch: 6


  0%|          | 0/293 [00:00<?, ?it/s]

Loss: 0.0029903159011155367
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.002783241681754589
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.0024072793312370777
Mean_iou: 1.0
Mean accuracy: 1.0
Epoch: 7


  0%|          | 0/293 [00:00<?, ?it/s]

Loss: 0.002300811931490898
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.002077128505334258
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.0017679280135780573
Mean_iou: 1.0
Mean accuracy: 1.0
Epoch: 8


  0%|          | 0/293 [00:00<?, ?it/s]

Loss: 0.0016386853531002998
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.0017031700117513537
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.0014218949945643544
Mean_iou: 1.0
Mean accuracy: 1.0
Epoch: 9


  0%|          | 0/293 [00:00<?, ?it/s]

Loss: 0.001243520062416792
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.0012025650357827544
Mean_iou: 1.0
Mean accuracy: 1.0
Loss: 0.001095765852369368
Mean_iou: 1.0
Mean accuracy: 1.0
