**요약**
- Mask2Former를 미세조정합니다.

<br>

**Inputs:**
- `dir_data`: 데이터가 있는 디렉토리
- `dir_ckpt`: 학습된 모델을 저장할 디렉토리

<br>

**Outputs**:
- f`{dir_ckpt}/1696079822`: 미세조정된 Mask2Former 모델 체크포인트

In [1]:
dir_data = '../data'
dir_ckpt = '../ckpt'

In [2]:
import sys
sys.path.append('../')

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import cv2
import pandas as pd
import albumentations as A

from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from transformers import Mask2FormerImageProcessor

from segformers.utils import seed_all, print_env
from segformers.networks import Mask2Former
from segformers.transforms import augmentation

  from .autonotebook import tqdm as notebook_tqdm
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b5-finetuned-cityscapes-1024-1024 and are newly initialized because the shapes did not match:
- decode_head.classifier.weight: found shape torch.Size([19, 768, 1, 1]) in the checkpoint and torch.Size([13, 768, 1, 1]) in the model instantiated
- decode_head.classifier.bias: found shape torch.Size([19]) in the checkpoint and torch.Size([13]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of Mask2FormerForUniversalSegmentation were not initialized from the model checkpoint at facebook/mask2former-swin-large-cityscapes-semantic and are newly initialized because the shapes did not match:
- class_predictor.bias: found shape torch.Size([20]) in the checkpoint and torch.Size([14]) in the model instantiated
- class_predictor.weigh

In [3]:
print_env()

DATE : 2023-10-04
Pyton Version : 3.8.17
PyTorch Version : 1.13.0
OS : Linux 5.4.0-155-generic
CPU spec : x86_64
RAM spec : 503.73 GB
Device 0:
Name: NVIDIA A100-SXM4-40GB
Total Memory: 40536.1875 MB
Driver Version: 470.199.02
Device 1:
Name: NVIDIA A100-SXM4-40GB
Total Memory: 40536.1875 MB
Driver Version: 470.199.02
Device 2:
Name: NVIDIA A100-SXM4-40GB
Total Memory: 40536.1875 MB
Driver Version: 470.199.02
Device 3:
Name: NVIDIA DGX Display
Total Memory: 3911.875 MB
Driver Version: 470.199.02
Device 4:
Name: NVIDIA A100-SXM4-40GB
Total Memory: 40536.1875 MB
Driver Version: 470.199.02


In [3]:
image_processor = Mask2FormerImageProcessor.from_pretrained("facebook/mask2former-swin-large-cityscapes-semantic")
image_processor.do_resize = False
model = Mask2Former

In [4]:
class SourceDataset(Dataset):
    def __init__(self, root, csv_file, transform=None):
        self.root = root
        self.data = pd.read_csv(os.path.join(self.root, csv_file))
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.data.loc[idx, 'img_path'])
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        mask_path = os.path.join(self.root, self.data.loc[idx, 'gt_path'])
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask[mask == 255] = 12  # Considering pixel value 12 as background

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            augmented_image = augmented['image']
            augmented_mask = augmented['mask']

        return augmented_image, augmented_mask, augmented_image, augmented_mask,
    
def collate_fn(batch):
    inputs = list(zip(*batch))
    images = inputs[0]
    segmentation_maps = inputs[1]
    # this function pads the inputs to the same size,
    # and creates a pixel mask
    # actually padding isn't required here since we are cropping
    batch = image_processor(
        images,
        segmentation_maps=segmentation_maps,
        return_tensors="pt",
    )

    batch["original_images"] = inputs[2]
    batch["original_segmentation_maps"] = inputs[3]
    
    return batch

train_dataset = SourceDataset(root=dir_data, csv_file='full.csv', transform=augmentation)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)

valid_dataset = SourceDataset(root=dir_data, csv_file='val_source.csv', transform=A.Compose([A.Resize(512, 512)]))
valid_loader = DataLoader(valid_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)


In [5]:
import math
import os
from glob import glob

import torch
import torch.nn as nn
import wandb
from torch.optim import AdamW
from torch.optim.lr_scheduler import _LRScheduler

from segformers.utils import compute_mIoU


class Trainer:
    def __init__(
        self,
        model,
        config,
    ):
        self.model = model
        self.n_epochs = config['n_epochs']
        self.dir_ckpt = config['dir_ckpt']

        param_optimizer = list(self.model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay': 0.001},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay': 0.0}
        ]
        self.optimizer = AdamW(optimizer_grouped_parameters, **config['optimizer'])

        self.scheduler = CosineAnnealingWarmUpRestarts(self.optimizer, **config['scheduler'])
        self.criterion = nn.CrossEntropyLoss()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)
        self.best_metric = 0.0
        wandb.init(**config['wandb'], config=config)

    def fit(self, train_loader, valid_loader):
        for e in range(self.n_epochs):
            train_scores = self.train(train_loader)
            valid_scores = self.evaluate(valid_loader)

            log = {'Epoch': e + 1, 'LR': self.scheduler.get_lr()[0]}
            for k, v in train_scores.items():
                log[f'train_{k}'] = v

            for k, v in valid_scores.items():
                log[f'valid_{k}'] = v

            msg = ''
            for k, v in log.items():
                msg += f'{k}: {v:.4f} | '
            print(msg[:-1])
            wandb.log(log)

            self.save(f'{self.dir_ckpt}/last_ckpt.bin')
            if valid_scores['mIoU'] > self.best_metric:
                self.best_metric = valid_scores['mIoU']
                self.save(f'{self.dir_ckpt}/best_ckpt_{str(e+1).zfill(4)}.bin')
                # Keep top 3 models
                for path in sorted(glob(f'{self.dir_ckpt}/best_ckpt_*.bin'))[:-3]:
                    os.remove(path)

            self.scheduler.step()
        wandb.finish()

    def train(self, loader):
        self.model.train()
        n = 0
        scores = {'Loss': 0.0, 'mIoU': 0.0}
        for it, inputs in enumerate(loader):
            print(f"{it} / {len(loader)}", end='\r')
            outputs = self.model(
                pixel_values=inputs["pixel_values"].to(self.device),
                mask_labels=[labels.to(self.device) for labels in inputs["mask_labels"]],
                class_labels=[labels.to(self.device) for labels in inputs["class_labels"]],
            )

            self.optimizer.zero_grad()
            loss = outputs.loss
            loss.backward()
            self.optimizer.step()
            
             # get original images
            original_images = inputs["original_images"]
            target_sizes = [(image.shape[0], image.shape[1]) for image in original_images]
            # predict segmentation maps
            predicted = image_processor.post_process_semantic_segmentation(outputs, target_sizes=target_sizes)
            batch_size = len(original_images)
            n += batch_size
            scores['Loss'] += batch_size * loss.item()
            for pred, gt in zip(predicted, inputs['original_segmentation_maps']):
                scores['mIoU'] += compute_mIoU(pred, torch.as_tensor(gt, device=self.device))

        for k, v in scores.items():
            scores[k] = v / n

        return scores

    @torch.no_grad()
    def evaluate(self, loader):
        self.model.eval()
        n = 0
        scores = {'Loss': 0.0, 'mIoU': 0.0}
        for inputs in loader:
            outputs = self.model(
                pixel_values=inputs["pixel_values"].to(self.device),
                mask_labels=[labels.to(self.device) for labels in inputs["mask_labels"]],
                class_labels=[labels.to(self.device) for labels in inputs["class_labels"]],
            )

            loss = outputs.loss          
             # get original images
            original_images = inputs["original_images"]
            target_sizes = [(image.shape[0], image.shape[1]) for image in original_images]
            # predict segmentation maps
            predicted = image_processor.post_process_semantic_segmentation(outputs, target_sizes=target_sizes)
            batch_size = len(original_images)
            n += batch_size
            scores['Loss'] += batch_size * loss.item()
            for pred, gt in zip(predicted, inputs['original_segmentation_maps']):
                scores['mIoU'] += compute_mIoU(pred, torch.as_tensor(gt, device=self.device))

        for k, v in scores.items():
            scores[k] = v / n

        return scores

    def save(self, path):
        self.model.eval()
        torch.save({
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
        }, path)

    def load(self, path):
        checkpoint = torch.load(path)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])


class CosineAnnealingWarmUpRestarts(_LRScheduler):
    """
    https://gaussian37.github.io/dl-pytorch-lr_scheduler/
    """
    def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [(self.eta_max - base_lr) * self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.eta_max - base_lr) *
                    (1 + math.cos(math.pi * (self.T_cur - self.T_up) / (self.T_i - self.T_up))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch

        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr


In [6]:
config = {
    'dir_data': './data',
    'dir_ckpt': './ckpt',
    'seed': 0,
    'n_epochs': 50,
    'optimizer': {
        'lr': 0.0,
    },

    'scheduler': {
        'T_0': 50,
        'T_mult': 1,
        'eta_max': 0.00005,
        'T_up': 5,
        'gamma': 0.5,
    },

    'wandb': {
        'project': 'DA',
        'name': 'Mask2Former'
    }
}

In [7]:
seed_all(config['seed'])
run_id = int(datetime.timestamp(datetime.now()))
config['run_id'] = run_id
config['dir_ckpt'] = os.path.join(dir_ckpt, str(run_id))
os.makedirs(config['dir_ckpt'])


In [8]:
trainer = Trainer(model, config)
trainer.load('/home/dongjin/projects/da/ckpt/1695982337/last_ckpt.bin')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdongjinlee[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
trainer.fit(train_loader, valid_loader)

Epoch: 1.0000 | LR: 0.0000 | train_Loss: 16.7380 | train_mIoU: 0.6635 | valid_Loss: 18.8804 | valid_mIoU: 0.6510 |
Epoch: 2.0000 | LR: 0.0000 | train_Loss: 15.8349 | train_mIoU: 0.6786 | valid_Loss: 18.3618 | valid_mIoU: 0.6506 |
Epoch: 3.0000 | LR: 0.0000 | train_Loss: 15.4554 | train_mIoU: 0.6837 | valid_Loss: 18.2946 | valid_mIoU: 0.6503 |
Epoch: 4.0000 | LR: 0.0000 | train_Loss: 15.2329 | train_mIoU: 0.6853 | valid_Loss: 19.0883 | valid_mIoU: 0.6540 |
Epoch: 5.0000 | LR: 0.0000 | train_Loss: 15.0496 | train_mIoU: 0.6906 | valid_Loss: 17.9825 | valid_mIoU: 0.6685 |
Epoch: 6.0000 | LR: 0.0001 | train_Loss: 14.8931 | train_mIoU: 0.6922 | valid_Loss: 18.3842 | valid_mIoU: 0.6464 |
Epoch: 7.0000 | LR: 0.0000 | train_Loss: 14.5293 | train_mIoU: 0.6999 | valid_Loss: 17.9689 | valid_mIoU: 0.6535 |
Epoch: 8.0000 | LR: 0.0000 | train_Loss: 14.0966 | train_mIoU: 0.7116 | valid_Loss: 18.2261 | valid_mIoU: 0.6613 |
Epoch: 9.0000 | LR: 0.0000 | train_Loss: 13.9868 | train_mIoU: 0.7139 | valid_Lo

KeyboardInterrupt: 