In [4]:
! pip --quiet install datasets evaluate

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor, TrainingArguments, Trainer
import evaluate

In [2]:
print("Is CUDA available?", torch.cuda.is_available())
print("GPU device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")

Is CUDA available? True
GPU device: Tesla T4


In [3]:
dataset = load_dataset("SodaXII/blb-ms-01")

num_classes = 5  # [0: Unlabeled, 1: Others, 2: Low-severity, 3: High-severity, 4: Healthy]

processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")

  return func(*args, **kwargs)


In [5]:
# Preprocessing function
def preprocess_data(example):
    image = np.array(example["image"], dtype=np.float32)  # Ensure float32 type

    # Normalize image to [0, 1] range
    image = image / image.max()

    # Convert to tensor
    image_tensor = torch.tensor(image, dtype=torch.float32)

    # Convert labels to tensor
    labels = torch.tensor(np.array(example["label"]), dtype=torch.long)

    return {
        "pixel_values": image_tensor,
        "labels": labels
    }

# Apply preprocessing
dataset = dataset.map(preprocess_data, remove_columns=["image"])

# Modify SegFormer to accept 6-channel input
model = SegformerForSemanticSegmentation.from_pretrained(
    "nvidia/segformer-b0-finetuned-ade-512-512",
    num_labels=num_classes,
    ignore_mismatched_sizes=True  # Avoids shape mismatch errors
)




ValueError: Column to remove ['image'] not in the dataset. Current columns in the dataset: ['label', 'pixel_values', 'labels']

In [6]:
# Adjust model's first convolution layer to accept 6
with torch.no_grad():
    model.segformer.encoder.patch_embeddings[0].proj = torch.nn.Conv2d(
        in_channels=6,  # Change from 3 to 6 bands
        out_channels=model.config.hidden_sizes[0],
        kernel_size=7,
        stride=4,
        padding=3,
        bias=False
    )

# Define metric
metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=1)

    return metric.compute(predictions=predictions, references=labels, num_labels=num_classes)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./segformer-finetuned",
    evaluation_strategy="epoch",  # Evaluate at each epoch
    save_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=10,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    push_to_hub=False,  # Set to True if pushing model to Hugging Face Hub
    save_total_limit=2,  # Keep only last 2 checkpoints
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    report_to="tensorboard"
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()



Epoch,Training Loss,Validation Loss




TypeError: MeanIoU._compute() missing 1 required positional argument: 'ignore_index'