# 1. Libraries

In [None]:
!pip install -q datasets transformers evaluate &> /dev/null
!pip install accelerate -U &> /dev/null

In [None]:
from datasets import Dataset, DatasetDict, Image
from PIL import Image as PilImage
import os
import json
from transformers import AutoImageProcessor
from torchvision.transforms import ColorJitter
import evaluate
import numpy as np
import torch
from torch import nn
from tqdm import tqdm
from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
from torch.utils.data import DataLoader


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1.1 Constants

In [None]:
CHECKPOINT = "nvidia/mit-b0"
CHUNK_SIZE = 7

# 2. Download Data

In [None]:
!unzip  /content/drive/MyDrive/PROGETTO-PIRELLI/datasets/Pirelli_4cat.zip -d /content &> /dev/null

managing wether to use a supervised dataset composed of 3 or 4 classes

In [None]:
if 'Pirelli_3cat' in os.listdir('/content'):
    NUM_CLASSES = 3
    base_path = os.path.join('/content','Pirelli_3cat')
else:
    NUM_CLASSES = 4
    base_path = os.path.join('/content','Pirelli_4cat')

## 2.1 Convert the label in grayscale

In [None]:
def conversion(input_directory):
  '''
  This function force the label image to be in grayscale
  '''
  for filename in os.listdir(input_directory):

      if filename.endswith('.png'):
          # Open the original image
          input_path = os.path.join(input_directory, filename)
          original_image = PilImage.open(input_path)

          # Convert the image to grayscale
          grayscale_image = original_image.convert('L')

          # Save the grayscale image, overwriting the original
          grayscale_image.save(input_path)

In [None]:
conversion(os.path.join(base_path,'label','train'))
conversion(os.path.join(base_path,'label','val'))

# 3. Dataset creation

## 3.1 Create list of paths to retrieve the images

In [None]:
def create_paths(base_path, dataset_type):
    '''
    It defines 2 lists of paths:
        - one for the original image
        - one for the mask associated to the images
    '''
    image_paths = []
    label_paths = []

    image_folder = os.path.join(base_path, 'img', dataset_type)
    label_folder = os.path.join(base_path, 'label', dataset_type)

    for file in sorted(os.listdir(image_folder)):

            if file.endswith('.png'):
                image_path = os.path.join(image_folder, file)
                image_paths.append(image_path)

                label_path = os.path.join(label_folder, os.path.splitext(file)[0] + '.png')
                label_paths.append(label_path)

    return image_paths, label_paths

In [None]:
image_paths_train, label_paths_train = create_paths(base_path, 'train')
image_paths_validation, label_paths_validation = create_paths(base_path, 'val')

## 3.2 Create HugFace dataset from the paths

In [None]:
def create_dataset(image_paths, label_paths):
    dataset = Dataset.from_dict({"image": sorted(image_paths),
                                 "label": sorted(label_paths)})
    dataset = dataset.cast_column("image", Image())
    dataset = dataset.cast_column("label", Image())

    return dataset

In [None]:
train_dataset = create_dataset(image_paths_train, label_paths_train)
validation_dataset = create_dataset(image_paths_validation, label_paths_validation)

In [None]:
ds = validation_dataset.train_test_split(test_size=0.5)

In [None]:
val = ds['train']
test = ds['test']

# 4. Image processor

In [None]:
image_processor = AutoImageProcessor.from_pretrained(CHECKPOINT)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/272 [00:00<?, ?B/s]

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [None]:
def transforms(example_batch):
    images = [x for x in example_batch["image"]]
    labels = [x for x in example_batch["label"]]
    inputs = image_processor(images, labels)
    return inputs

In [None]:
train_dataset.set_transform(transforms)
val.set_transform(transforms)

# 5. Metrics

## 5.1 IoU Loading

In [None]:
metric = evaluate.load("mean_iou")

Downloading builder script:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

## 5.2 Compute IoU on predictions

In [None]:
def compute_metrics(eval_pred):
    '''
    This function takes in input the whole validation set of shape
    Num_Samples X Num_Patches X H_Patch X W_Patch and evaluate it on the IoU.
    Due to the fact that interpolation is quite expensive on the whole eval dataset it
    is performed in chunks and then the output concatenated for evaluation.
    '''

    with torch.no_grad():
        logits, labels = eval_pred
        num_chunks = (logits.shape[0] + CHUNK_SIZE - 1) // CHUNK_SIZE

        pred_labels_chunks = []

        for i in tqdm(range(num_chunks)):
            start_idx = i * CHUNK_SIZE
            end_idx = min((i + 1) * CHUNK_SIZE, logits.shape[0])

            #chunk selection
            logits_chunk = logits[start_idx:end_idx, :, :, :]
            logits_tensor = torch.from_numpy(logits_chunk)

            #interpolation
            logits_tensor = nn.functional.interpolate(
                logits_tensor,
                size=labels.shape[-2:],
                mode="bilinear",
                align_corners=False,
            ).argmax(dim=1)

            pred_labels_chunks.append(logits_tensor.detach().cpu())  # Move back to CPU for concatenation

        #concatenation
        pred_labels = torch.cat(pred_labels_chunks, dim=0).numpy()

        #evaluation
        metrics = metric.compute(
            predictions=pred_labels,
            references=labels,
            num_labels=NUM_CLASSES,
            ignore_index=255,
            reduce_labels=False,
        )

        for key, value in metrics.items():
            if isinstance(value, np.ndarray):
                metrics[key] = np.round(value, 2).tolist()

        torch.cuda.empty_cache()
        return metrics

# 6. Training

In [None]:
model = AutoModelForSemanticSegmentation.from_pretrained(CHECKPOINT)

config.json:   0%|          | 0.00/70.0k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/14.4M [00:00<?, ?B/s]

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.linear_c.1.proj.weight', 'decode_head.batch_norm.bias', 'decode_head.batch_norm.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.linear_c.1.proj.bias', 'decode_head.classifier.bias', 'decode_head.linear_fuse.weight', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.linear_c.2.proj.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.0.proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total Trainable Parameters: {total_params}")

Total Trainable Parameters: 3971144


In [None]:
training_args = TrainingArguments(
    output_dir="pirelli",
    learning_rate=1e-3,
    num_train_epochs=5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    remove_unused_columns=False,
    load_best_model_at_end = True,
    logging_steps=1
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val,
    compute_metrics= compute_metrics
)

trainer.train()

Epoch,Training Loss,Validation Loss,Mean Iou,Mean Accuracy,Overall Accuracy,Per Category Iou,Per Category Accuracy
1,0.5259,0.556042,0.415551,0.611957,0.892815,"[0.88, 0.78, 0.0, 0.0]","[0.99, 0.85, 0.0, nan]"
2,0.1591,0.879597,0.39386,0.579719,0.86939,"[0.84, 0.73, 0.0, 0.0]","[1.0, 0.74, 0.0, nan]"
3,0.0673,0.434825,0.433499,0.624709,0.902568,"[0.88, 0.84, 0.01, 0.0]","[1.0, 0.86, 0.01, nan]"
4,0.1516,0.510174,0.426467,0.618525,0.896971,"[0.88, 0.81, 0.02, 0.0]","[1.0, 0.84, 0.02, nan]"
5,0.0433,0.336854,0.4533,0.649857,0.917293,"[0.9, 0.88, 0.04, 0.0]","[0.99, 0.92, 0.04, nan]"


100%|██████████| 4/4 [00:34<00:00,  8.56s/it]
  acc = total_area_intersect / total_area_label
Trainer is attempting to log a value of "[0.88, 0.78, 0.0, 0.0]" of type <class 'list'> for key "eval/per_category_iou" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.99, 0.85, 0.0, nan]" of type <class 'list'> for key "eval/per_category_accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
100%|██████████| 4/4 [00:33<00:00,  8.33s/it]
  acc = total_area_intersect / total_area_label
Trainer is attempting to log a value of "[0.84, 0.73, 0.0, 0.0]" of type <class 'list'> for key "eval/per_category_iou" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[1.0, 0.74, 0.0, nan]" of type <class 'list'> for key "eval/per_category_accura

TrainOutput(global_step=220, training_loss=0.6807598719204014, metrics={'train_runtime': 474.9091, 'train_samples_per_second': 0.916, 'train_steps_per_second': 0.463, 'total_flos': 8151120614522880.0, 'train_loss': 0.6807598719204014, 'epoch': 5.0})

In [None]:
torch.cuda.empty_cache()

# Inference

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # use GPU if available, otherwise use a CPU

In [None]:
validation_dataset = create_dataset(image_paths_validation, label_paths_validation)

In [None]:
class_to_color = {
    0: (0, 0, 0),  # Color for class 0
    1: (255, 0, 0),  # Color for class 1
    2: (0, 255, 0),  # Color for class 2
}

In [None]:
os.makedirs('/content/results')

In [None]:
for i, el in enumerate(validation_dataset):
    image = el['image']
    encoding = image_processor(image, return_tensors="pt")
    pixel_values = encoding.pixel_values.to(device)
    outputs = model(pixel_values=pixel_values)
    logits = outputs.logits.cpu()
    upsampled_logits = nn.functional.interpolate(
        logits,
        size=image.size[::-1],
        mode="bilinear",
        align_corners=False,
    )
    pred_seg = upsampled_logits.argmax(dim=1)[0]
    img = np.zeros((pred_seg.shape[0], pred_seg.shape[1], 3), dtype=np.uint8)
    for class_value, color in class_to_color.items():
        img[pred_seg == class_value] = color

    segmented_image_np = PilImage.fromarray(img)
    segmented_image_np.save(os.path.join('/content/results',str(i)+'.png'))
    torch.cuda.empty_cache()

In [None]:
!zip -r reseult_baseline_4Cat.zip /content/results

In [None]:
!mv /content/reseult_baseline_4Cat.zip /content/drive/MyDrive/PROGETTO-PIRELLI/results