# **Segformer**

In [1]:
from datasets import load_dataset
from PIL import Image
from huggingface_hub import hf_hub_download
import json
import torchvision.transforms as t
from transformers import AutoImageProcessor
from torchvision.transforms import ColorJitter
from transformers import SegformerImageProcessor
from torchvision.transforms import v2
import torch
from torch import nn
import evaluate
from transformers import Trainer
from transformers import SegformerForSemanticSegmentation
from transformers import TrainingArguments






Access token to get access to the dataset. 

You just need a HuggingFace account to have access to it. 

If you don't have a HuggingFace account use the temporary token shared in the report.

In [2]:
access_token = "place_token_in_this string"

Load the dataset:

In [3]:
hf_dataset_identifier = "segments/sidewalk-semantic"
ds = load_dataset(hf_dataset_identifier,
                   token=access_token)

Using the latest cached version of the dataset since segments/sidewalk-semantic couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /root/.cache/huggingface/datasets/segments___sidewalk-semantic/default/0.0.0/9c33630210cfdc58ab3680f425d44b79c4d03c53 (last modified on Wed Feb 19 03:35:10 2025).


Prepare labels :

In [4]:
filename = "id2label.json"
id2label = json.load(
    open(hf_hub_download(hf_dataset_identifier, filename, repo_type="dataset",token=access_token), "r")
)
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}

num_labels = len(id2label)

In [5]:
num_labels, list(label2id.keys())

(35,
 ['unlabeled',
  'flat-road',
  'flat-sidewalk',
  'flat-crosswalk',
  'flat-cyclinglane',
  'flat-parkingdriveway',
  'flat-railtrack',
  'flat-curb',
  'human-person',
  'human-rider',
  'vehicle-car',
  'vehicle-truck',
  'vehicle-bus',
  'vehicle-tramtrain',
  'vehicle-motorcycle',
  'vehicle-bicycle',
  'vehicle-caravan',
  'vehicle-cartrailer',
  'construction-building',
  'construction-door',
  'construction-wall',
  'construction-fenceguardrail',
  'construction-bridge',
  'construction-tunnel',
  'construction-stairs',
  'object-pole',
  'object-trafficsign',
  'object-trafficlight',
  'nature-vegetation',
  'nature-terrain',
  'sky',
  'void-ground',
  'void-dynamic',
  'void-static',
  'void-unclear'])

Prepare the preprocessing for the data

In [6]:
pretrained_model_name = "nvidia/mit-b2" 
image_processor = AutoImageProcessor.from_pretrained(pretrained_model_name)
image_processor

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


SegformerImageProcessor {
  "do_normalize": true,
  "do_reduce_labels": false,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "SegformerImageProcessor",
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "resample": 2,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 512,
    "width": 512
  }
}

In [7]:

def transforms(image):
    image = t.ToTensor(image)
    image = image.permute(
        (2, 0, 1)
    )  # because model is channels-first
    return image


def preprocess(example_batch):
    images = [transforms(x.convert("RGB")) for x in example_batch["pixel_values"]]
    labels = [x for x in example_batch["label"]]
    inputs = image_processor(images, labels)
    return inputs

In [8]:

processor = SegformerImageProcessor()

image_only_transforms = v2.Compose([
     ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1),
])
label_image_transforms = v2.Compose([
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomVerticalFlip(p=0.5),
])

def train_transforms(example_batch):
    images = [image_only_transforms(x) for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]

    images, labels= label_image_transforms(images, labels)
    inputs = processor(images, labels)
    return inputs


def val_transforms(example_batch):
    images = [x for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]
    inputs = processor(images, labels)
    return inputs




Prepare dataset:

In [9]:
# split up training into training + validation
splits = ds["train"].train_test_split(test_size=0.2)
train_ds = splits["train"]
val_ds = splits["test"]

In [10]:
# Set transforms
train_ds.set_transform(train_transforms)
val_ds.set_transform(val_transforms)

Load our segformer model

In [11]:

#use the mit-b2 encoder for our experiments
pretrained_model_name = "nvidia/mit-b2" 
model = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id,
    long_decoder_depth=0,
    width_list=[1024, 1280, 1536, 1792]
    
)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight', 'decode_head.wide_segment.0.bias', 'decode_head.wide_segment.0.weight', 'decode_head.wide_segment.1.bias', 'decode_head.wide_segment.1.num_batches_tracked', 'decode_head.wide_segment.1.running_mean', 'decode_head.wide_segment.1.running_var', 'decode_head.wide_segment.1.weight',

In [12]:
print(model.decode_head)

SegformerDecodeHead(
  (linear_c): ModuleList(
    (0): SegformerMLP(
      (proj): Linear(in_features=64, out_features=768, bias=True)
    )
    (1): SegformerMLP(
      (proj): Linear(in_features=128, out_features=768, bias=True)
    )
    (2): SegformerMLP(
      (proj): Linear(in_features=320, out_features=768, bias=True)
    )
    (3): SegformerMLP(
      (proj): Linear(in_features=512, out_features=768, bias=True)
    )
  )
  (linear_fuse): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (batch_norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation): ReLU()
  (wide_segment): Sequential(
    (0): Conv2d(768, 1024, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Conv2d(1024, 1280, kernel_size=(1, 1), stride=(1, 1))
    (5): BatchNorm2d(1280, eps=1e-05, momentum=0.1, affine=True

In [13]:
print(model.decode_head.num_parameters())

10045731


set the parameters for the trainning pipeline :

In [14]:

#set the parameters for the trainning 
epochs = 80
lr = 0.00006
batch_size = 4
checkpoints_path = "./training_logs"
training_args = TrainingArguments(
    checkpoints_path,
    learning_rate=lr,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    save_total_limit=3,
    eval_strategy="steps",
    save_strategy="steps",
    save_steps=20,
    eval_steps=20,
    logging_steps=1,
    eval_accumulation_steps=5,
    load_best_model_at_end=True,
    metric_for_best_model="eval_mean_iou",
    greater_is_better = True,
    lr_scheduler_type="polynomial"
)


The metric for the trainning and the evaluation (loss and mIOU):

In [15]:


metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().cpu().numpy()
    metrics = metric.compute(
        predictions=pred_labels,
        references=labels,
        num_labels=len(id2label),
        ignore_index=0,
        reduce_labels=processor.do_reduce_labels,
    )
    
    # remove not necessary metrics
    metrics.pop("per_category_accuracy")
    metrics.pop("mean_accuracy")
    metrics.pop("overall_accuracy")
    metrics.pop("per_category_iou")    
    return metrics


Build the trainer for trainning:

In [16]:

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)


[codecarbon ERROR @ 03:43:03] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Launch the trainning : 

In [None]:
trainer.train()

Save the model:

In [None]:
trainer.save_model("./segformer_wide_")

Evaluate the model

In [None]:
trainer.evaluate()

# Evaluation of all models

Here we evaluate each of our model.

## The vanilla model (just the segformer)

In [17]:
checkpoints_path = "./training_logs"
epochs = 80
lr = 0.00006
batch_size = 4


In [18]:
pretrained_model_name = "./segformer_vanilla" 
model_eval = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id,
)

In [19]:
training_args_eval = TrainingArguments(
    checkpoints_path,
    learning_rate=lr,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    save_total_limit=3,
    eval_strategy="steps",
    save_strategy="steps",
    save_steps=20,
    eval_steps=20,
    logging_steps=1,
    eval_accumulation_steps=5,
    load_best_model_at_end=True,
    metric_for_best_model="eval_mean_iou",
    greater_is_better = True,
    lr_scheduler_type="polynomial"
)

In [20]:
trainer_eval = Trainer(
    model=model_eval,
    args=training_args_eval,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)

[codecarbon ERROR @ 03:43:05] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


In [21]:
print("Model " + pretrained_model_name[2:] + ":")
print(f"    total number of parameters :{model_eval.num_parameters()}")
print(f"    number of parameters on the decoder :{model_eval.decode_head.num_parameters()}")
print("\n Decoder architecture:")
print(model_eval.decode_head)
print("\n ### Evaluation of the model " + pretrained_model_name[2:] + ":" )
trainer_eval.evaluate()

Model segformer_vanilla:
    total number of parameters :27373539
    number of parameters on the decoder :3177251

 Decoder architecture:
SegformerDecodeHead(
  (linear_c): ModuleList(
    (0): SegformerMLP(
      (proj): Linear(in_features=64, out_features=768, bias=True)
    )
    (1): SegformerMLP(
      (proj): Linear(in_features=128, out_features=768, bias=True)
    )
    (2): SegformerMLP(
      (proj): Linear(in_features=320, out_features=768, bias=True)
    )
    (3): SegformerMLP(
      (proj): Linear(in_features=512, out_features=768, bias=True)
    )
  )
  (linear_fuse): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (batch_norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
  (classifier): Conv2d(768, 35, kernel_size=(1, 1), stride=(1, 1))
)

 ### Evaluation of the model segformer_vanilla:


  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label


{'eval_loss': 0.2091539353132248,
 'eval_model_preparation_time': 0.0047,
 'eval_mean_iou': 0.5765912893236104,
 'eval_runtime': 19.8525,
 'eval_samples_per_second': 10.074,
 'eval_steps_per_second': 2.519}

# Here we test all the long decoder

First the segformer_long_3

In [22]:
pretrained_model_name = "./segformer_long_3" 
model_eval = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id,
)
trainer_eval = Trainer(
    model=model_eval,
    args=training_args_eval,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)
print("Model " + pretrained_model_name[2:] + ":")
print(f"    total number of parameters :{model_eval.num_parameters()}")
print(f"    number of parameters on the decoder :{model_eval.decode_head.num_parameters()}")
print("\n Decoder architecture:")
print(model_eval.decode_head)
print("\n ### Evaluation of the model " + pretrained_model_name[2:] + ":" )
trainer_eval.evaluate()

[codecarbon ERROR @ 03:43:25] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Model segformer_long_3:
    total number of parameters :29149923
    number of parameters on the decoder :4953635

 Decoder architecture:
SegformerDecodeHead(
  (linear_c): ModuleList(
    (0): SegformerMLP(
      (proj): Linear(in_features=64, out_features=768, bias=True)
    )
    (1): SegformerMLP(
      (proj): Linear(in_features=128, out_features=768, bias=True)
    )
    (2): SegformerMLP(
      (proj): Linear(in_features=320, out_features=768, bias=True)
    )
    (3): SegformerMLP(
      (proj): Linear(in_features=512, out_features=768, bias=True)
    )
  )
  (linear_fuse): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (batch_norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation): ReLU()
  (long_segment): Sequential(
    (0): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inpla

{'eval_loss': 0.15932905673980713,
 'eval_model_preparation_time': 0.0042,
 'eval_mean_iou': 0.7001308792788314,
 'eval_runtime': 19.708,
 'eval_samples_per_second': 10.148,
 'eval_steps_per_second': 2.537}

Then the segformer_long_5

In [23]:
pretrained_model_name = "./segformer_long_5" 
model_eval = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id,
)
trainer_eval = Trainer(
    model=model_eval,
    args=training_args_eval,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)
print("Model " + pretrained_model_name[2:] + ":")
print(f"    total number of parameters :{model_eval.num_parameters()}")
print(f"    number of parameters on the decoder :{model_eval.decode_head.num_parameters()}")
print("\n Decoder architecture:")
print(model_eval.decode_head)
print("\n ### Evaluation of the model " + pretrained_model_name[2:] + ":" )
trainer_eval.evaluate()

[codecarbon ERROR @ 03:43:45] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Model segformer_long_5:
    total number of parameters :30334179
    number of parameters on the decoder :6137891

 Decoder architecture:
SegformerDecodeHead(
  (linear_c): ModuleList(
    (0): SegformerMLP(
      (proj): Linear(in_features=64, out_features=768, bias=True)
    )
    (1): SegformerMLP(
      (proj): Linear(in_features=128, out_features=768, bias=True)
    )
    (2): SegformerMLP(
      (proj): Linear(in_features=320, out_features=768, bias=True)
    )
    (3): SegformerMLP(
      (proj): Linear(in_features=512, out_features=768, bias=True)
    )
  )
  (linear_fuse): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (batch_norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation): ReLU()
  (long_segment): Sequential(
    (0): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inpla

{'eval_loss': 0.1676626354455948,
 'eval_model_preparation_time': 0.0043,
 'eval_mean_iou': 0.7037701836403736,
 'eval_runtime': 20.4524,
 'eval_samples_per_second': 9.779,
 'eval_steps_per_second': 2.445}

Finally the segformer_long_8

In [24]:
pretrained_model_name = "./segformer_long_8" 
model_eval = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id,
)
trainer_eval = Trainer(
    model=model_eval,
    args=training_args_eval,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)
print("Model " + pretrained_model_name[2:] + ":")
print(f"    total number of parameters :{model_eval.num_parameters()}")
print(f"    number of parameters on the decoder :{model_eval.decode_head.num_parameters()}")
print("\n Decoder architecture:")
print(model_eval.decode_head)
print("\n ### Evaluation of the model " + pretrained_model_name[2:] + ":" )
trainer_eval.evaluate()

[codecarbon ERROR @ 03:44:06] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Model segformer_long_8:
    total number of parameters :32110563
    number of parameters on the decoder :7914275

 Decoder architecture:
SegformerDecodeHead(
  (linear_c): ModuleList(
    (0): SegformerMLP(
      (proj): Linear(in_features=64, out_features=768, bias=True)
    )
    (1): SegformerMLP(
      (proj): Linear(in_features=128, out_features=768, bias=True)
    )
    (2): SegformerMLP(
      (proj): Linear(in_features=320, out_features=768, bias=True)
    )
    (3): SegformerMLP(
      (proj): Linear(in_features=512, out_features=768, bias=True)
    )
  )
  (linear_fuse): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (batch_norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation): ReLU()
  (long_segment): Sequential(
    (0): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inpla

{'eval_loss': 0.18361562490463257,
 'eval_model_preparation_time': 0.0046,
 'eval_mean_iou': 0.7046008187441395,
 'eval_runtime': 21.9321,
 'eval_samples_per_second': 9.119,
 'eval_steps_per_second': 2.28}

## Here we test all the wide decoder


Then segformer_wide_3

In [25]:
pretrained_model_name = "./segformer_wide_3" 
model_eval = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id,
)
trainer_eval = Trainer(
    model=model_eval,
    args=training_args_eval,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)
print("Model " + pretrained_model_name[2:] + ":")
print(f"    total number of parameters :{model_eval.num_parameters()}")
print(f"    number of parameters on the decoder :{model_eval.decode_head.num_parameters()}")
print("\n Decoder architecture:")
print(model_eval.decode_head)
print("\n ### Evaluation of the model " + pretrained_model_name[2:] + ":" )
trainer_eval.evaluate()

[codecarbon ERROR @ 03:44:28] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Model segformer_wide_3:
    total number of parameters :31475171
    number of parameters on the decoder :7278883

 Decoder architecture:
SegformerDecodeHead(
  (linear_c): ModuleList(
    (0): SegformerMLP(
      (proj): Linear(in_features=64, out_features=768, bias=True)
    )
    (1): SegformerMLP(
      (proj): Linear(in_features=128, out_features=768, bias=True)
    )
    (2): SegformerMLP(
      (proj): Linear(in_features=320, out_features=768, bias=True)
    )
    (3): SegformerMLP(
      (proj): Linear(in_features=512, out_features=768, bias=True)
    )
  )
  (linear_fuse): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (batch_norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation): ReLU()
  (wide_segment): Sequential(
    (0): Conv2d(768, 1024, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inp

{'eval_loss': 0.12735484540462494,
 'eval_model_preparation_time': 0.0042,
 'eval_mean_iou': 0.766613073891128,
 'eval_runtime': 21.2249,
 'eval_samples_per_second': 9.423,
 'eval_steps_per_second': 2.356}

Finally segformer_wide_4

In [26]:
pretrained_model_name = "./segformer_wide_4" 
model_eval = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id,
)
trainer_eval = Trainer(
    model=model_eval,
    args=training_args_eval,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)
print("Model " + pretrained_model_name[2:] + ":")
print(f"    total number of parameters :{model_eval.num_parameters()}")
print(f"    number of parameters on the decoder :{model_eval.decode_head.num_parameters()}")
print("\n Decoder architecture:")
print(model_eval.decode_head)
print("\n ### Evaluation of the model " + pretrained_model_name[2:] + ":" )
trainer_eval.evaluate()

[codecarbon ERROR @ 03:44:49] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Model segformer_wide_4:
    total number of parameters :34242019
    number of parameters on the decoder :10045731

 Decoder architecture:
SegformerDecodeHead(
  (linear_c): ModuleList(
    (0): SegformerMLP(
      (proj): Linear(in_features=64, out_features=768, bias=True)
    )
    (1): SegformerMLP(
      (proj): Linear(in_features=128, out_features=768, bias=True)
    )
    (2): SegformerMLP(
      (proj): Linear(in_features=320, out_features=768, bias=True)
    )
    (3): SegformerMLP(
      (proj): Linear(in_features=512, out_features=768, bias=True)
    )
  )
  (linear_fuse): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (batch_norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation): ReLU()
  (wide_segment): Sequential(
    (0): Conv2d(768, 1024, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.1, in

{'eval_loss': 0.1819719523191452,
 'eval_model_preparation_time': 0.0043,
 'eval_mean_iou': 0.6951090629663104,
 'eval_runtime': 22.9251,
 'eval_samples_per_second': 8.724,
 'eval_steps_per_second': 2.181}