In [None]:
!pip install gdown -q

In [None]:
!pip install transformers[torch] accelerate evaluate datasets peft gdown -q
!git lfs install

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
import gdown
gdown.download("https://drive.google.com/file/d/135goeAyfKzasCtME-gZoEY6A9f9a9Xm5/view?usp=sharing", output="train.rar", quiet=False, fuzzy=True)
gdown.download("https://drive.google.com/file/d/1W4EdUSKG-txm0wY2gTh6wFgVXX2iK6b7/view?usp=sharing", output="test.rar", quiet=False, fuzzy=True)
gdown.download("https://drive.google.com/file/d/1aXnuT8I3QVkJcnqV4RL3gtgsgRr-ZrKC/view?usp=sharing", output="hold.rar", quiet=False, fuzzy=True)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!rm -rf /content/data.zip

In [None]:
!unrar x 'train.rar' 'train'
!unrar x 'test.rar' 'test'
!unrar x 'hold.rar' 'hold'
# !rm *.rar

## Building Damage Image Masking: Image Segmentation with LoRA

#### Import Necessary Libraries

In [None]:
import os, sys, glob, shutil
os.environ['TOKENIZERS_PARALLELISM']='false'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import Dataset

import transformers
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
from transformers import AutoImageProcessor
from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer

import torchvision

import evaluate

from PIL import Image

import peft
from peft import LoraConfig, get_peft_model

import cv2
import glob
from tqdm.contrib.concurrent import process_map

#### Display Library Versions

In [None]:
print("Python :".rjust(18), sys.version[0:6])
print("NumPy :".rjust(18), np.__version__)
print("Pandas :".rjust(18), pd.__version__)
print("Torch :".rjust(18), torch.__version__)
print("Torch Vision :".rjust(18), torchvision.__version__)
print("Transformers :".rjust(18), transformers.__version__)
print("Evaluate :".rjust(18), evaluate.__version__)
print("PEFT :".rjust(18), peft.__version__)

#### Basic Values/Constants

In [None]:
checkpoint = "nvidia/mit-b0"

#### Create Dataset Class

In [None]:
def img_rewrite(image_path):
  image = cv2.imread(image_path, 0)
  image[image>0] = 1
  cv2.imwrite(image_path, image)

images = glob.glob("/content/*/targets/*")

process_map(
        img_rewrite,
        images
    )

In [None]:
class ImageSegmentationDataset(Dataset):
    """
    Class to create an Image
    (Semantic) Segmentation dataset
    """

    def __init__(self, parent_dir, sub_path, image_processor):
        """
        Args:
            parent_dir (string): Root directory of the dataset containing the train, dev, test.
            sub_path (string): sub directory containing images + annotations
            image_processor: image processor used to prepare images + segmentation maps.
        """
        self.parent_dir = parent_dir
        self.image_processor = image_processor

        self.img_dir = os.path.join(self.parent_dir, sub_path, "images")
        self.ann_dir = os.path.join(self.parent_dir, sub_path, "targets")

        # read images
        image_file_names = []
        for root, dirs, files in os.walk(self.img_dir):
            image_file_names.extend(files)
        self.images = sorted(image_file_names)

        # read annotations
        annotation_file_names = []
        for root, dirs, files in os.walk(self.ann_dir):
            annotation_file_names.extend(files)
        self.annotations = sorted(annotation_file_names)

        assert len(self.images) == len(self.annotations), \
            "There must be as many images as there are segmentation maps"

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):

        image = Image.open(os.path.join(self.img_dir, self.images[idx]))
        segmentation_map = Image.open(os.path.join(self.ann_dir, self.annotations[idx]))

        # randomly crop + pad both image and segmentation map to same size
        encoded_inputs = self.image_processor(image, segmentation_map, return_tensors="pt")

        for k,v in encoded_inputs.items():
            encoded_inputs[k].squeeze_() # remove batch dimension

        return encoded_inputs

#### Define Image Processor & Ingest Training & Testing Datasets

In [None]:
image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=False)

data_folder = "/content/data"

train_ds = ImageSegmentationDataset(parent_dir=data_folder, sub_path="train",
                                      image_processor=image_processor)
test_ds = ImageSegmentationDataset(parent_dir=data_folder, sub_path="test",
                                      image_processor=image_processor,)
hold_ds = ImageSegmentationDataset(parent_dir=data_folder, sub_path="hold",
                                      image_processor=image_processor,)

In [None]:
print("Number of training examples:", len(train_ds))
print("Number of validation examples:", len(test_ds))
print("Number of validation examples:", len(hold_ds))

#### Shape of Sample (Pixel Values Feature)

In [None]:
sample = train_ds[12]

sample['pixel_values'].shape

#### Shape of Sample (Labels Feature)

In [None]:
sample['labels'].shape

#### Sample Tensors Output

In [None]:
sample['pixel_values']

In [None]:
sample['labels']

#### Unique Label Values

In [None]:
sample['labels'].squeeze().unique()

#### Create Conversions Between String & Integer Values For Label Values

In [None]:
id2label = {0: "background", 1: "building"}
label2id = {label: idx for idx, label in id2label.items()}

num_labels = len(id2label)

print(f"List of Unique Label Values: {id2label}")
print(f"Number of Unique Label Values: {num_labels}")
print(f"label2id: {label2id}")
print(f"id2label: {id2label}")

#### Define Compute Metrics Function

In [None]:
metric = evaluate.load("mean_iou")


def compute_metrics(eval_pred):
    with torch.no_grad():
        logits, labels = eval_pred
        logits_tensor = torch.from_numpy(logits)
        logits_tensor = nn.functional.interpolate(
            logits_tensor,
            size=labels.shape[-2:],
            mode="bilinear",
            align_corners=False,
        ).argmax(dim=1)

        pred_labels = logits_tensor.detach().cpu().numpy()
        # currently using _compute instead of compute
        # see this issue for more info: https://github.com/huggingface/evaluate/pull/328#issuecomment-1286866576
        metrics = metric._compute(
            predictions=pred_labels,
            references=labels,
            num_labels=len(id2label),
            ignore_index=0,
            reduce_labels=image_processor.do_reduce_labels,
        )

        per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
        per_category_iou = metrics.pop("per_category_iou").tolist()

        metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
        metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})

        return metrics

#### Define Function to Display Number of Trainable Parameters

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

#### Define Base Model

In [None]:
model = AutoModelForSemanticSegmentation.from_pretrained(
    checkpoint, id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True
)
print_trainable_parameters(model)

#### Define LoRA Configuration

In [None]:
config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    bias="lora_only",
    modules_to_save=["decode_head"],
)

#### Wrap Base Model With LoRA Configuration

In [None]:
lora_model = get_peft_model(model, config)

print_trainable_parameters(lora_model)

#### Define Early Stopping Callback

In [None]:
early_stopping_callback = [EarlyStoppingCallback(early_stopping_patience=4)]

#### Define Training Arguments

In [None]:
model_name = checkpoint.split("/")[-1]

training_args = TrainingArguments(
    output_dir=f"{model_name}-building-damage-lora",
    learning_rate=5e-4,
    num_train_epochs=25,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=2,
    save_total_limit=3,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=5,
    remove_unused_columns=False,
    load_best_model_at_end=True,
    push_to_hub=True,
    label_names=["labels"],
)

#### Define Trainer

In [None]:
trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics,
    callbacks=early_stopping_callback,
)

#### Train Model

In [None]:
trainer.train()

#### Save Model

In [None]:
model_id = "segformer-building-damage-lora"
lora_model.save_pretrained(model_id)

#### Push Model to Hub

In [None]:
trainer.push_to_hub(f"Full data model!!!")

# Let’s now prepare an inference_model and run inference.

In [None]:
from peft import PeftConfig, PeftModel

config = PeftConfig.from_pretrained(model_id)
model = AutoModelForSemanticSegmentation.from_pretrained(
    checkpoint, id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True
)

inference_model = PeftModel.from_pretrained(model, model_id)

Get an image:

In [None]:
image = Image.open("/content/data/test/images/socal-fire_00001387_post_disaster.png")
image

Preprocess the image to prepare for inference.

In [None]:
encoding = image_processor(image.convert("RGB"), return_tensors="pt")

Run inference with the encoded image.

In [None]:
with torch.no_grad():
    outputs = inference_model(pixel_values=encoding.pixel_values)
    logits = outputs.logits

upsampled_logits = nn.functional.interpolate(
    logits,
    size=image.size[::-1],
    mode="bilinear",
    align_corners=False,
)

pred_seg = upsampled_logits.argmax(dim=1)[0]

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.imshow(image)
ax1.set_title('Image')
ax2.imshow(pred_seg)
ax2.set_title('Segmentation')
plt.show()



Next, visualize the results. We need a color palette for this. Here, we use ade_palette(). As it is a long array, so we don’t include it in this guide, please copy it from the TensorFlow Model Garden repository.


In [None]:
def create_ade20k_label_colormap():
  """Creates a label colormap used in ADE20K segmentation benchmark.

  Returns:
    A colormap for visualizing segmentation results.
  """
  return np.asarray([[0, 0, 0],[120, 120, 120],[180, 120, 120],[6, 230, 230],[80, 50, 50],[4, 200, 3],[120, 120, 80],[140, 140, 140],[204, 5, 255],
    [230, 230, 230],[4, 250, 7],[224, 5, 255],[235, 255, 7],[150, 5, 61],[120, 120, 70],[8, 255, 51],[255, 6, 82],[143, 255, 140],[204, 255, 4],
    [255, 51, 7],[204, 70, 3],[0, 102, 200],[61, 230, 250],[255, 6, 51],[11, 102, 255],[255, 7, 71],[255, 9, 224],[9, 7, 230],[220, 220, 220],
    [255, 9, 92],[112, 9, 255],[8, 255, 214],[7, 255, 224],[255, 184, 6],[10, 255, 71],[255, 41, 10],[7, 255, 255],[224, 255, 8],[102, 8, 255],
    [255, 61, 6],[255, 194, 7],[255, 122, 8],[0, 255, 20],[255, 8, 41],[255, 5, 153],[6, 51, 255],[235, 12, 255],[160, 150, 20],[0, 163, 255],
    [140, 140, 140],[250, 10, 15],[20, 255, 0],[31, 255, 0],[255, 31, 0],[255, 224, 0],[153, 255, 0],[0, 0, 255],[255, 71, 0],[0, 235, 255],
    [0, 173, 255],[31, 0, 255],[11, 200, 200],[255, 82, 0],[0, 255, 245],[0, 61, 255],[0, 255, 112],[0, 255, 133],[255, 0, 0],[255, 163, 0],
    [255, 102, 0],[194, 255, 0],[0, 143, 255],[51, 255, 0],[0, 82, 255],[0, 255, 41],[0, 255, 173],[10, 0, 255],[173, 255, 0],[0, 255, 153],
    [255, 92, 0],[255, 0, 255],[255, 0, 245],[255, 0, 102],[255, 173, 0],[255, 0, 20],[255, 184, 184],[0, 31, 255],[0, 255, 61],[0, 71, 255],
    [255, 0, 204],[0, 255, 194],[0, 255, 82],[0, 10, 255],[0, 112, 255],[51, 0, 255],[0, 194, 255],[0, 122, 255],[0, 255, 163],[255, 153, 0],
    [0, 255, 10],[255, 112, 0],[143, 255, 0],[82, 0, 255],[163, 255, 0],[255, 235, 0],[8, 184, 170],[133, 0, 255],[0, 255, 92],[184, 0, 255],
    [255, 0, 31],[0, 184, 255],[0, 214, 255],[255, 0, 112],[92, 255, 0],[0, 224, 255],[112, 224, 255],[70, 184, 160],[163, 0, 255],[153, 0, 255],
    [71, 255, 0],[255, 0, 163],[255, 204, 0],[255, 0, 143],[0, 255, 235],[133, 255, 0],[255, 0, 235],[245, 0, 255],[255, 0, 122],[255, 245, 0],
    [10, 190, 212],[214, 255, 0],[0, 204, 255],[20, 0, 255],[255, 255, 0],[0, 153, 255],[0, 41, 255],[0, 255, 204],[41, 0, 255],[41, 255, 0],
    [173, 0, 255],[0, 245, 255],[71, 0, 255],[122, 0, 255],[0, 255, 184],[0, 92, 255],[184, 255, 0],[0, 133, 255],[255, 214, 0],[25, 194, 194],
    [102, 255, 0],[92, 0, 255],
  ])

In [None]:
import matplotlib.pyplot as plt

color_seg = np.zeros((pred_seg.shape[0], pred_seg.shape[1], 3), dtype=np.uint8)
palette = np.array(create_ade20k_label_colormap())

for label, color in enumerate(palette):
    color_seg[pred_seg == label, :] = color
color_seg = color_seg[..., ::-1]  # convert to BGR

img = np.array(image) * 0.5 + color_seg * 0.5  # plot the image with the segmentation map
img = img.astype(np.uint8)

plt.figure(figsize=(15, 10))
plt.imshow(img)
plt.show()