In [1]:
from datasets import load_dataset
from PIL import Image



In [2]:
model_checkpoint = "nvidia/mit-b0"  # pre-trained model from which to fine-tune
batch_size = 4  # batch size for training and evaluation

In [3]:
from datasets import load_dataset

hf_dataset_identifier = "segments/sidewalk-semantic"
ds = load_dataset(hf_dataset_identifier)


In [4]:
from huggingface_hub import hf_hub_download
import json

filename = "id2label.json"
id2label = json.load(
    open(hf_hub_download(hf_dataset_identifier, filename, repo_type="dataset"), "r")
)
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}

num_labels = len(id2label)

In [5]:
num_labels, list(label2id.keys())

(35,
 ['unlabeled',
  'flat-road',
  'flat-sidewalk',
  'flat-crosswalk',
  'flat-cyclinglane',
  'flat-parkingdriveway',
  'flat-railtrack',
  'flat-curb',
  'human-person',
  'human-rider',
  'vehicle-car',
  'vehicle-truck',
  'vehicle-bus',
  'vehicle-tramtrain',
  'vehicle-motorcycle',
  'vehicle-bicycle',
  'vehicle-caravan',
  'vehicle-cartrailer',
  'construction-building',
  'construction-door',
  'construction-wall',
  'construction-fenceguardrail',
  'construction-bridge',
  'construction-tunnel',
  'construction-stairs',
  'object-pole',
  'object-trafficsign',
  'object-trafficlight',
  'nature-vegetation',
  'nature-terrain',
  'sky',
  'void-ground',
  'void-dynamic',
  'void-static',
  'void-unclear'])

In [6]:
from transformers import AutoImageProcessor

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


SegformerImageProcessor {
  "do_normalize": true,
  "do_reduce_labels": false,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "SegformerImageProcessor",
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "resample": 2,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 512,
    "width": 512
  }
}

In [7]:
import torchvision.transforms as t

def transforms(image):
    image = t.ToTensor(image)
    image = image.permute(
        (2, 0, 1)
    )  # Since vision models in transformers are channels-first layout
    return image


def preprocess(example_batch):
    images = [transforms(x.convert("RGB")) for x in example_batch["pixel_values"]]
    labels = [x for x in example_batch["label"]]
    inputs = image_processor(images, labels)
    return inputs

In [8]:
# split up training into training + validation
splits = ds["train"].train_test_split(test_size=0.1)
train_ds = splits["train"]
val_ds = splits["test"]

In [9]:
train_ds.set_transform(preprocess)
val_ds.set_transform(preprocess)

In [10]:
epochs = 50
lr = 0.00006

In [11]:
from torchvision.transforms import ColorJitter
from transformers import SegformerImageProcessor

processor = SegformerImageProcessor()
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1) 

def train_transforms(example_batch):
    images = [jitter(x) for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]
    inputs = processor(images, labels)
    return inputs


def val_transforms(example_batch):
    images = [x for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]
    inputs = processor(images, labels)
    return inputs


# Set transforms
train_ds.set_transform(train_transforms)
val_ds.set_transform(val_transforms)


In [12]:
from transformers import SegformerForSemanticSegmentation

pretrained_model_name = "nvidia/mit-b2" 
model = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id
)

config.json:   0%|          | 0.00/70.0k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/99.0M [00:00<?, ?B/s]

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
from transformers import TrainingArguments

epochs = 50
lr = 0.00006
batch_size = 5

hub_model_id = "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"

training_args = TrainingArguments(
    "nvidia/segformer-b2-finetuned-cityscapes-1024-1024",
    learning_rate=lr,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    save_total_limit=3,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=20,
    eval_steps=20,
    logging_steps=1,
    eval_accumulation_steps=5,
    load_best_model_at_end=True,
    push_to_hub=True,
    hub_model_id=hub_model_id,
    hub_strategy="end",
)




In [14]:
import torch
from torch import nn
import evaluate

metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().cpu().numpy()
    metrics = metric.compute(
        predictions=pred_labels,
        references=labels,
        num_labels=len(id2label),
        ignore_index=0,
        reduce_labels=processor.do_reduce_labels,
    )
    
    # add per category metrics as individual key-value pairs
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
    metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
    
    return metrics


In [15]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)


model.safetensors:   0%|          | 0.00/98.9M [00:00<?, ?B/s]

[codecarbon ERROR @ 00:29:30] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


In [None]:
trainer.train()



Step,Training Loss,Validation Loss,Mean Iou,Mean Accuracy,Overall Accuracy,Accuracy Unlabeled,Accuracy Flat-road,Accuracy Flat-sidewalk,Accuracy Flat-crosswalk,Accuracy Flat-cyclinglane,Accuracy Flat-parkingdriveway,Accuracy Flat-railtrack,Accuracy Flat-curb,Accuracy Human-person,Accuracy Human-rider,Accuracy Vehicle-car,Accuracy Vehicle-truck,Accuracy Vehicle-bus,Accuracy Vehicle-tramtrain,Accuracy Vehicle-motorcycle,Accuracy Vehicle-bicycle,Accuracy Vehicle-caravan,Accuracy Vehicle-cartrailer,Accuracy Construction-building,Accuracy Construction-door,Accuracy Construction-wall,Accuracy Construction-fenceguardrail,Accuracy Construction-bridge,Accuracy Construction-tunnel,Accuracy Construction-stairs,Accuracy Object-pole,Accuracy Object-trafficsign,Accuracy Object-trafficlight,Accuracy Nature-vegetation,Accuracy Nature-terrain,Accuracy Sky,Accuracy Void-ground,Accuracy Void-dynamic,Accuracy Void-static,Accuracy Void-unclear,Iou Unlabeled,Iou Flat-road,Iou Flat-sidewalk,Iou Flat-crosswalk,Iou Flat-cyclinglane,Iou Flat-parkingdriveway,Iou Flat-railtrack,Iou Flat-curb,Iou Human-person,Iou Human-rider,Iou Vehicle-car,Iou Vehicle-truck,Iou Vehicle-bus,Iou Vehicle-tramtrain,Iou Vehicle-motorcycle,Iou Vehicle-bicycle,Iou Vehicle-caravan,Iou Vehicle-cartrailer,Iou Construction-building,Iou Construction-door,Iou Construction-wall,Iou Construction-fenceguardrail,Iou Construction-bridge,Iou Construction-tunnel,Iou Construction-stairs,Iou Object-pole,Iou Object-trafficsign,Iou Object-trafficlight,Iou Nature-vegetation,Iou Nature-terrain,Iou Sky,Iou Void-ground,Iou Void-dynamic,Iou Void-static,Iou Void-unclear
20,1.7906,1.669401,0.099478,0.141029,0.618717,,0.440766,0.961564,0.0,0.002169,0.008576,0.0,0.000152,0.000289,0.0,0.615067,0.0,0.000197,0.0,0.0,0.092345,0.0,0.0,0.892944,0.0,0.0,0.0,0.0,,0.0,0.006827,0.0,,0.91612,0.372272,0.203118,0.0,0.0,0.000533,0.0,,0.353745,0.617638,0.0,0.002137,0.008192,0.0,0.000151,0.000289,0.0,0.542232,0.0,0.000186,0.0,0.0,0.088428,0.0,0.0,0.445341,0.0,0.0,0.0,0.0,,0.0,0.006622,0.0,,0.607463,0.307333,0.202999,0.0,0.0,0.000527,0.0
40,0.9283,1.093371,0.152132,0.199429,0.723792,,0.80708,0.941333,0.0,0.222423,0.011774,0.0,8e-05,0.006814,0.0,0.914909,0.0,0.0,0.0,0.0,0.001234,0.0,0.0,0.906288,0.0,0.0,0.0,0.0,,0.0,0.000251,0.0,,0.865075,0.800774,0.903392,0.0,0.0,0.000295,0.0,,0.521124,0.736174,0.0,0.220834,0.01121,0.0,8e-05,0.006812,0.0,0.720187,0.0,0.0,0.0,0.0,0.001234,0.0,0.0,0.529424,0.0,0.0,0.0,0.0,,0.0,0.000251,0.0,,0.730398,0.545259,0.844956,0.0,0.0,0.000294,0.0


  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label


In [16]:
a=

SyntaxError: invalid syntax (62411553.py, line 1)

# STOP

In [3]:
#validation_ds = load_dataset("Chris1/cityscapes",split="validation")
#train_ds = load_dataset("Chris1/cityscapes",split="train")
#test_ds = load_dataset("Chris1/cityscapes",split="test")

In [10]:
print(validation_ds["train"][0])

{'pixel_values': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1920x1080 at 0x7468C55BB8B0>, 'label': <PIL.PngImagePlugin.PngImageFile image mode=L size=1920x1080 at 0x7468C1942BF0>}


In [5]:
#print(validation_ds)

In [11]:
validation_ds = validation_ds.rename_column('label', 'mask')
validation_ds = validation_ds.rename_column('pixel_values', 'image')

#train_ds = train_ds.rename_column('semantic_segmentation', 'mask')
#test_ds = test_ds.rename_column('semantic_segmentation', 'mask')

In [6]:
#print(train_ds[0])

{'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=2048x1024 at 0x7D9297936920>, 'mask': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=2048x1024 at 0x7D92E91C8F70>}


In [12]:
import matplotlib.pyplot as plt 

In [20]:
from torchvision.transforms import ColorJitter
from transformers import SegformerImageProcessor

processor = SegformerImageProcessor(size = {"height": 224, "width": 224})
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1) 

def train_transforms(example_batch):
    images = [jitter(x.convert("RGB")) for x in example_batch["train"]['image']]
    labels = [x.convert("L") for x in example_batch["train"]['mask']]
    print[labels[0].shape]
    inputs = processor(images, labels)
    return inputs


def val_transforms(example_batch):
    images = [x.convert("RGB") for x in example_batch["train"]['image']]
    labels = [x.convert("L") for x in example_batch["train"]['mask']]
    inputs = processor(images, labels)
    return inputs




In [21]:
# Set transforms
#train_ds.set_transform(train_transforms)
#test_ds.set_transform(val_transforms)
validation_ds.set_transform(val_transforms)


In [22]:
from transformers import SegformerForSemanticSegmentation

pretrained_model_name = "nvidia/mit-b4" 
model = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
)


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b4 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [23]:
from transformers import TrainingArguments

epochs = 50
lr = 0.00006
batch_size = 5


training_args = TrainingArguments(
    "./",
    learning_rate=lr,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    save_total_limit=1,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=1,
    eval_steps=1,
    logging_steps=1,
    eval_accumulation_steps=1,
    load_best_model_at_end=True,
    remove_unused_columns=False
)




In [24]:
import torch
from torch import nn
import evaluate

metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    print(logits.shape)
    print("ok")
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=224,
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().numpy()
    metrics = metric.compute(
        predictions=pred_labels,
        references=labels,
        num_labels=150,
        ignore_index=0,
        reduce_labels=processor.do_reduce_labels,
    )
    
    
    return metrics


In [25]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=validation_ds,
    eval_dataset=validation_ds,
    compute_metrics=compute_metrics,
)


[codecarbon ERROR @ 23:42:09] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


In [26]:
trainer.train()



KeyError: "Invalid key: 0. Please first select a split. For example: `my_dataset_dictionary['train'][0]`. Available splits: ['train']"

# brouillion

In [None]:
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
from PIL import Image
import requests

feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-cityscapes-1024-1024")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = ds[0]["image"]

inputs = feature_extractor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits  # shape (batch_size, num_labels, height/4, width/4)


In [None]:
from transformers import AutoImageProcessor, SegformerForSemanticSegmentation
from PIL import Image
import requests

image_processor = AutoImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

inputs = image_processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits  # shape (batch_size, num_labels, height/4, width/4)
list(logits.shape)

In [63]:
import torch 
argmax = logits.argmax(dim=1)

In [None]:
argmax.shape

In [None]:
import matplotlib.pyplot as plt
plt.subplot(1, 2, 1)
plt.imshow((argmax).permute(1, 2, 0))
plt.subplot(1, 2, 2)
plt.imshow(image)
plt.show()
