In [1]:
import os
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
from roboflow import Roboflow

In [2]:
rf = Roboflow(api_key="4QwtaiJqX1nqiPdAyXcV")
dataset = rf.workspace("dron-hkv6n").project("clasificacion-de-suelo-2").version(15).download("png-mask-semantic", location="content/my-datasets")

loading Roboflow workspace...
loading Roboflow project...


In [3]:
data_path = "content/my-datasets"
train_path = os.path.join(data_path, "train")
valid_path = os.path.join(data_path, "valid")
test_path = os.path.join(data_path, "test")

In [4]:
NUM_CLASSES = 5
id2label = {
    0: "background",
    1: "agua",
    2: "suelo expuesto",
    3: "vegetacion seca",
    4: "vegetacion verde"
}
label2id = {v: k for k, v in id2label.items()}

In [5]:
feature_extractor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")

The following named arguments are not valid for `SegformerImageProcessor.__init__` and were ignored: 'feature_extractor_type'


In [6]:
class SegmentationDataset(Dataset):
    def __init__(self, image_dir, feature_extractor, label_map):
        self.image_dir = image_dir
        self.image_files = sorted([f for f in os.listdir(image_dir) if f.endswith((".jpg", ".png")) and "_mask" not in f])
        self.mask_files = [img.rsplit(".", 1)[0] + "_mask.png" for img in self.image_files]
        self.feature_extractor = feature_extractor
        self.label_map = label_map

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        mask_path = os.path.join(self.image_dir, self.mask_files[idx])

        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path)

        inputs = self.feature_extractor(images=image, return_tensors="pt")
        image = inputs["pixel_values"].squeeze()

        mask = np.array(mask)
        new_mask = np.full_like(mask, fill_value=255)
        for original, mapped in self.label_map.items():
            new_mask[mask == original] = mapped

        mask = torch.tensor(new_mask, dtype=torch.long)

        return image, mask

label_map = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4}  # mapa de remapeo para las 5 clases

In [7]:
train_dataset = SegmentationDataset(train_path, feature_extractor, label_map)
valid_dataset = SegmentationDataset(valid_path, feature_extractor, label_map)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=2)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SegformerForSemanticSegmentation.from_pretrained(
    "nvidia/segformer-b0-finetuned-ade-512-512",
    num_labels=NUM_CLASSES,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=255)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b0-finetuned-ade-512-512 and are newly initialized because the shapes did not match:
- decode_head.classifier.bias: found shape torch.Size([150]) in the checkpoint and torch.Size([5]) in the model instantiated
- decode_head.classifier.weight: found shape torch.Size([150, 256, 1, 1]) in the checkpoint and torch.Size([5, 256, 1, 1]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0.001):
        self.patience = patience
        self.delta = delta
        self.best_loss = float('inf')
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True


In [10]:
def train_one_epoch():
    model.train()
    total_loss = 0
    for images, masks in train_loader:
        images, masks = images.to(device), masks.to(device)

        logits = model(pixel_values=images).logits

        masks_resized = torch.nn.functional.interpolate(
            masks.unsqueeze(1).float(), size=logits.shape[2:], mode='nearest'
        ).squeeze(1).long()

        loss = loss_fn(logits, masks_resized)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(train_loader)

def val_one_epoch():
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for images, masks in valid_loader:
            images, masks = images.to(device), masks.to(device)

            logits = model(pixel_values=images).logits

            masks_resized = torch.nn.functional.interpolate(
                masks.unsqueeze(1).float(), size=logits.shape[2:], mode='nearest'
            ).squeeze(1).long()

            loss = loss_fn(logits, masks_resized)
            total_val_loss += loss.item()

    return total_val_loss / len(valid_loader)

In [11]:
early_stopping = EarlyStopping(patience=5, delta=0.001)

for epoch in range(60):
    train_loss = train_one_epoch()
    val_loss = val_one_epoch()

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")

    if val_loss < early_stopping.best_loss:
        torch.save(model, "../best_models/SEGFORMER.pth")
        print(f"Mejor modelo guardado en la época {epoch+1} con val_loss = {val_loss:.4f}")

    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping activado. Fin del entrenamiento.")
        break

Epoch 1: Train Loss = 1.1138, Val Loss = 0.6569
Mejor modelo guardado en la época 1 con val_loss = 0.6569
Epoch 2: Train Loss = 0.7540, Val Loss = 0.5454
Mejor modelo guardado en la época 2 con val_loss = 0.5454
Epoch 3: Train Loss = 0.6618, Val Loss = 0.8179
Epoch 4: Train Loss = 0.5515, Val Loss = 0.5492
Epoch 5: Train Loss = 0.4922, Val Loss = 0.5535
Epoch 6: Train Loss = 0.4329, Val Loss = 0.5391
Mejor modelo guardado en la época 6 con val_loss = 0.5391
Epoch 7: Train Loss = 0.4124, Val Loss = 0.6428
Epoch 8: Train Loss = 0.4242, Val Loss = 0.4793
Mejor modelo guardado en la época 8 con val_loss = 0.4793
Epoch 9: Train Loss = 0.3456, Val Loss = 0.4482
Mejor modelo guardado en la época 9 con val_loss = 0.4482
Epoch 10: Train Loss = 0.3486, Val Loss = 0.5064
Epoch 11: Train Loss = 0.3463, Val Loss = 0.5091
Epoch 12: Train Loss = 0.3348, Val Loss = 0.3889
Mejor modelo guardado en la época 12 con val_loss = 0.3889
Epoch 13: Train Loss = 0.2861, Val Loss = 0.4625
Epoch 14: Train Loss = 

In [12]:
import requests

# Reemplaza con tu token y chat ID
TOKEN = "7582937970:AAEjX_QwxmfjTaFEtJB8ooS4DJ9r782xts8"
CHAT_ID = "7127859860"
MENSAJE = "TERMINO DE ENTRENAR"

# URL de la API de Telegram
URL = f"https://api.telegram.org/bot{TOKEN}/sendMessage"

# Parámetros del mensaje
data = {
    "chat_id": CHAT_ID,
    "text": MENSAJE
}

# Enviar el mensaje
respuesta = requests.post(URL, json=data)

In [13]:
# Guardar todo el modelo
# torch.save(model, "../best_models/SEGFORMER.pth")