In [4]:
!pip install segmentation-models-pytorch

from google.colab import drive
drive.mount('/content/drive')
import kagglehub
import os
from sklearn.model_selection import train_test_split
import random
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from tqdm import tqdm
import torchvision.transforms as T
import torchvision.transforms.functional as TF
from torchvision.transforms.functional import InterpolationMode
import torch.backends.cudnn as cudnn
from torch.cuda.amp import autocast, GradScaler
from torchvision.transforms import InterpolationMode
import segmentation_models_pytorch as smp

nikhilroxtomar_person_segmentation_path = kagglehub.dataset_download('nikhilroxtomar/person-segmentation')
tapakah68_supervisely_filtered_segmentation_person_dataset_path = kagglehub.dataset_download('tapakah68/supervisely-filtered-segmentation-person-dataset')

print('Data source import complete.')
print(nikhilroxtomar_person_segmentation_path)
print(tapakah68_supervisely_filtered_segmentation_person_dataset_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Using Colab cache for faster access to the 'person-segmentation' dataset.
Using Colab cache for faster access to the 'supervisely-filtered-segmentation-person-dataset' dataset.
Data source import complete.
/kaggle/input/person-segmentation
/kaggle/input/supervisely-filtered-segmentation-person-dataset


In [5]:
images_path =  "/kaggle/input/supervisely-filtered-segmentation-person-dataset/supervisely_person_clean_2667_img/supervisely_person_clean_2667_img/"
masks_path  = "/kaggle/input/supervisely-filtered-segmentation-person-dataset/supervisely_person_clean_2667_img/supervisely_person_clean_2667_img/"

images_path2="/kaggle/input/person-segmentation/people_segmentation/images/"
masks_path2="/kaggle/input/person-segmentation/people_segmentation/masks/"
df =  pd.read_csv('/kaggle/input/supervisely-filtered-segmentation-person-dataset/df.csv')
df.head()
images_path2_list = sorted(os.listdir(images_path2))
masks_path2_list = sorted(os.listdir(masks_path2))
df2 = df[["images", "masks"]].copy()
df2["images"] = df2['images'].apply(lambda x: images_path + x)
df2["masks"]  = df2['masks'].apply(lambda x: masks_path + x)
df2["coef"]   = 1

df3 = pd.DataFrame({
    "images": [images_path2 + elt for elt in images_path2_list],
    "masks":  [masks_path2 + elt for elt in masks_path2_list],
    "coef":   255
})

final_df = pd.concat([df2, df3], ignore_index=True)

X_train_raw, X_test_raw  =  train_test_split(final_df, test_size=0.1, random_state=42)

In [6]:
final_df = final_df.copy()
final_df["source"] = np.where(final_df["coef"] == 1, "supervisely", "person_seg")
print("Tổng số mẫu:", len(final_df))
print("\nSố mẫu theo nguồn:")
print(final_df["source"].value_counts())

def dice_loss(logits, target, eps=1e-6):
    pred = torch.sigmoid(logits)
    target = target.float()

    pred = pred.view(pred.size(0), -1)
    target = target.view(target.size(0), -1)

    inter = (pred * target).sum(dim=1)
    denom = pred.sum(dim=1) + target.sum(dim=1)
    dice = (2 * inter + eps) / (denom + eps)
    return 1 - dice.mean()

def dice_score_from_logits(logits, target, thr=0.5, eps=1e-6):
    prob = torch.sigmoid(logits)
    pred = (prob > thr).float()
    inter = (pred * target).sum()
    union = pred.sum() + target.sum()
    return ((2*inter + eps) / (union + eps)).item()

def logits_to_probs_preds(logits, thr=0.5):
    probs = torch.sigmoid(logits)
    preds = (probs > thr).float()
    return probs, preds
def update_global_stats_from_logits(logits, targets, thr=0.5):
    """
    logits: (B,1,H,W) raw logits
    targets: (B,1,H,W) {0,1}
    Return: tp, fp, fn, correct, total (all as python floats/ints)
    """
    probs = torch.sigmoid(logits)
    preds = (probs > thr).float()

    tp = (preds * targets).sum().item()
    fp = (preds * (1 - targets)).sum().item()
    fn = ((1 - preds) * targets).sum().item()

    correct = (preds == targets).float().sum().item()
    total = targets.numel()

    return tp, fp, fn, correct, total

Tổng số mẫu: 8345

Số mẫu theo nguồn:
source
person_seg     5678
supervisely    2667
Name: count, dtype: int64


In [7]:
SEED = 42

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]


def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
set_seed(SEED)

RAW_MEAN = [0.485, 0.456, 0.406]
RAW_STD  = [0.229, 0.224, 0.225]

class PreprocessedSegDataset(Dataset):
    def __init__(self, df, size=256, augment=False):
        self.df = df.reset_index(drop=True)
        self.size = size
        self.augment = augment

        self.mean = torch.tensor(IMAGENET_MEAN).view(3,1,1)
        self.std  = torch.tensor(IMAGENET_STD).view(3,1,1)

        self.color_jitter = T.ColorJitter(
            brightness=0.15, contrast=0.15, saturation=0.1, hue=0.02
        )

    def __len__(self):
        return len(self.df)

    def _resize(self, img, mask):
        img  = img.resize((self.size, self.size), Image.BILINEAR)
        mask = mask.resize((self.size, self.size), Image.NEAREST)
        return img, mask

    def _augment(self, img, mask):
        if random.random() < 0.5:
            img = ImageOps.mirror(img)
            mask = ImageOps.mirror(mask)

        if random.random() < 0.3:
            angle = random.uniform(-12, 12)
            img  = TF.rotate(img, angle, interpolation=InterpolationMode.BILINEAR)
            mask = TF.rotate(mask, angle, interpolation=InterpolationMode.NEAREST)

        if random.random() < 0.25:
            w, h = img.size
            scale = random.uniform(0.88, 1.0)
            nw, nh = int(w*scale), int(h*scale)
            l = random.randint(0, w-nw)
            t = random.randint(0, h-nh)
            img  = img.crop((l, t, l+nw, t+nh)).resize((self.size,self.size), Image.BILINEAR)
            mask = mask.crop((l, t, l+nw, t+nh)).resize((self.size,self.size), Image.NEAREST)

        img = self.color_jitter(img)
        return img, mask

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img = Image.open(row["images"]).convert("RGB")
        mask = Image.open(row["masks"]).convert("L")

        img, mask = self._resize(img, mask)
        if self.augment:
            img, mask = self._augment(img, mask)

        img = np.array(img, dtype=np.float32) / 255.0
        mask = np.array(mask, dtype=np.float32)

        if mask.max() > 1:
            mask = mask / mask.max()
        mask = (mask >= 0.5).astype(np.float32)

        img_t  = torch.from_numpy(img).permute(2,0,1)
        mask_t = torch.from_numpy(mask).unsqueeze(0)

        img_t = (img_t - self.mean) / self.std
        return img_t, mask_t

history_name = "training_unet_history_preprocessed_metrics.csv"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = smp.Unet(
    encoder_name="resnet18",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
    activation=None
).to(device)

criterion_bce = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

train_ds = PreprocessedSegDataset(X_train_raw, size=256, augment=True)
val_ds   = PreprocessedSegDataset(X_test_raw,  size=256, augment=False)

g = torch.Generator()
g.manual_seed(SEED)

train_loader = DataLoader(
    train_ds,
    batch_size=8,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
    worker_init_fn=seed_worker,
    generator=g
)

val_loader = DataLoader(
    val_ds,
    batch_size=8,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
    worker_init_fn=seed_worker,
    generator=g
)


num_epochs = 20
history = {
    "train_loss": [],
    "val_loss": [],
    "val_iou": [],
    "val_dice": [],
    "val_acc": [],
    "val_prec": [],
    "val_recall": [],
    "val_f1": []
}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    tp = fp = fn = 0.0
    correct = 0.0
    total = 0

    for imgs, masks in tqdm(train_loader, desc=f"Train epoch {epoch+1}"):
        imgs = imgs.to(device).float()
        masks = masks.to(device).float()

        logits = model(imgs)

        bce = criterion_bce(logits, masks)
        dsc = dice_loss(logits, masks)
        loss = bce + dsc

        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        with torch.no_grad():
            _tp, _fp, _fn, _correct, _total = update_global_stats_from_logits(logits, masks, thr=0.5)
            tp += _tp
            fp += _fp
            fn += _fn
            correct += _correct
            total += _total

    avg_loss = running_loss / max(1, len(train_loader))

    train_iou  = tp / (tp + fp + fn + 1e-6)
    train_dice = (2*tp) / (2*tp + fp + fn + 1e-6)
    train_acc  = correct / max(1, total)

    print(f"Epoch {epoch+1} - train avg loss: {avg_loss:.4f} | IoU: {train_iou:.4f} | Dice: {train_dice:.4f} | Acc: {train_acc:.4f}")
    history["train_loss"].append(avg_loss)

    model.eval()
    val_loss = 0.0
    val_steps = 0

    v_tp = v_fp = v_fn = 0.0
    v_correct = 0.0
    v_total = 0

    with torch.no_grad():
        for imgs, masks in val_loader:
            imgs = imgs.to(device).float()
            masks = masks.to(device).float()

            logits = model(imgs)

            bce = criterion_bce(logits, masks)
            dsc = dice_loss(logits, masks)
            batch_loss = (bce + dsc).item()
            val_loss += batch_loss
            val_steps += 1

            _tp, _fp, _fn, _correct, _total = update_global_stats_from_logits(logits, masks, thr=0.5)
            v_tp += _tp
            v_fp += _fp
            v_fn += _fn
            v_correct += _correct
            v_total += _total

    avg_val_loss = val_loss / max(1, val_steps)

    val_iou  = v_tp / (v_tp + v_fp + v_fn + 1e-6)
    val_dice = (2*v_tp) / (2*v_tp + v_fp + v_fn + 1e-6)
    val_acc  = v_correct / max(1, v_total)

    val_prec   = (v_tp + 1e-6) / (v_tp + v_fp + 1e-6)
    val_recall = (v_tp + 1e-6) / (v_tp + v_fn + 1e-6)
    val_f1     = (2*val_prec*val_recall + 1e-6) / (val_prec + val_recall + 1e-6)

    history["val_loss"].append(avg_val_loss)
    history["val_iou"].append(val_iou)
    history["val_dice"].append(val_dice)
    history["val_acc"].append(val_acc)
    history["val_prec"].append(val_prec)
    history["val_recall"].append(val_recall)
    history["val_f1"].append(val_f1)

    print(f"Val loss: {avg_val_loss:.4f} | IoU: {val_iou:.4f} | Dice: {val_dice:.4f} | Acc: {val_acc:.4f}")
    print(f"Precision: {val_prec:.4f} | Recall: {val_recall:.4f} | F1: {val_f1:.4f}")

hist_df = pd.DataFrame(history)
out_path = f"/content/drive/MyDrive/Data Mining/Project/Model/{history_name}"
os.makedirs(os.path.dirname(out_path), exist_ok=True)
hist_df.to_csv(out_path, index=False)
print(f"Saved training history to Drive ({history_name})")
save_path = "/content/drive/MyDrive/Data Mining/Project/Model/unet_preprocessing.pth"

torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
}, save_path)

print(f"Model saved to {save_path}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

Train epoch 1: 100%|██████████| 939/939 [04:35<00:00,  3.41it/s]

Epoch 1 - train avg loss: 0.4641 | IoU: 0.7799 | Dice: 0.8763 | Acc: 0.9275





Val loss: 0.2532 | IoU: 0.8710 | Dice: 0.9310 | Acc: 0.9620
Precision: 0.9187 | Recall: 0.9437 | F1: 0.9310


Train epoch 2: 100%|██████████| 939/939 [04:43<00:00,  3.31it/s]

Epoch 2 - train avg loss: 0.2422 | IoU: 0.8718 | Dice: 0.9315 | Acc: 0.9613





Val loss: 0.1894 | IoU: 0.8968 | Dice: 0.9456 | Acc: 0.9702
Precision: 0.9387 | Recall: 0.9526 | F1: 0.9456


Train epoch 3: 100%|██████████| 939/939 [04:18<00:00,  3.63it/s]

Epoch 3 - train avg loss: 0.2027 | IoU: 0.8886 | Dice: 0.9410 | Acc: 0.9667





Val loss: 0.1902 | IoU: 0.8958 | Dice: 0.9451 | Acc: 0.9703
Precision: 0.9521 | Recall: 0.9382 | F1: 0.9451


Train epoch 4: 100%|██████████| 939/939 [04:18<00:00,  3.63it/s]

Epoch 4 - train avg loss: 0.1756 | IoU: 0.9021 | Dice: 0.9485 | Acc: 0.9710





Val loss: 0.1744 | IoU: 0.9006 | Dice: 0.9477 | Acc: 0.9711
Precision: 0.9339 | Recall: 0.9619 | F1: 0.9477


Train epoch 5: 100%|██████████| 939/939 [04:19<00:00,  3.62it/s]

Epoch 5 - train avg loss: 0.1655 | IoU: 0.9071 | Dice: 0.9513 | Acc: 0.9726





Val loss: 0.1642 | IoU: 0.9090 | Dice: 0.9523 | Acc: 0.9738
Precision: 0.9434 | Recall: 0.9613 | F1: 0.9523


Train epoch 6: 100%|██████████| 939/939 [04:17<00:00,  3.64it/s]

Epoch 6 - train avg loss: 0.1465 | IoU: 0.9181 | Dice: 0.9573 | Acc: 0.9760





Val loss: 0.1565 | IoU: 0.9112 | Dice: 0.9535 | Acc: 0.9745
Precision: 0.9454 | Recall: 0.9618 | F1: 0.9535


Train epoch 7: 100%|██████████| 939/939 [04:16<00:00,  3.66it/s]

Epoch 7 - train avg loss: 0.1405 | IoU: 0.9217 | Dice: 0.9593 | Acc: 0.9771





Val loss: 0.1578 | IoU: 0.9112 | Dice: 0.9535 | Acc: 0.9744
Precision: 0.9406 | Recall: 0.9668 | F1: 0.9535


Train epoch 8: 100%|██████████| 939/939 [04:14<00:00,  3.69it/s]

Epoch 8 - train avg loss: 0.1388 | IoU: 0.9226 | Dice: 0.9597 | Acc: 0.9774





Val loss: 0.1648 | IoU: 0.9100 | Dice: 0.9529 | Acc: 0.9743
Precision: 0.9507 | Recall: 0.9551 | F1: 0.9529


Train epoch 9: 100%|██████████| 939/939 [04:14<00:00,  3.69it/s]

Epoch 9 - train avg loss: 0.1258 | IoU: 0.9294 | Dice: 0.9634 | Acc: 0.9794





Val loss: 0.1449 | IoU: 0.9190 | Dice: 0.9578 | Acc: 0.9768
Precision: 0.9489 | Recall: 0.9669 | F1: 0.9578


Train epoch 10: 100%|██████████| 939/939 [04:15<00:00,  3.68it/s]

Epoch 10 - train avg loss: 0.1227 | IoU: 0.9309 | Dice: 0.9642 | Acc: 0.9799





Val loss: 0.1514 | IoU: 0.9176 | Dice: 0.9570 | Acc: 0.9764
Precision: 0.9466 | Recall: 0.9677 | F1: 0.9570


Train epoch 11: 100%|██████████| 939/939 [04:16<00:00,  3.66it/s]

Epoch 11 - train avg loss: 0.1190 | IoU: 0.9324 | Dice: 0.9650 | Acc: 0.9803





Val loss: 0.1484 | IoU: 0.9177 | Dice: 0.9571 | Acc: 0.9766
Precision: 0.9554 | Recall: 0.9588 | F1: 0.9571


Train epoch 12: 100%|██████████| 939/939 [04:14<00:00,  3.69it/s]

Epoch 12 - train avg loss: 0.1140 | IoU: 0.9360 | Dice: 0.9669 | Acc: 0.9814





Val loss: 0.1510 | IoU: 0.9165 | Dice: 0.9564 | Acc: 0.9759
Precision: 0.9395 | Recall: 0.9740 | F1: 0.9564


Train epoch 13: 100%|██████████| 939/939 [04:12<00:00,  3.72it/s]

Epoch 13 - train avg loss: 0.1121 | IoU: 0.9371 | Dice: 0.9675 | Acc: 0.9818





Val loss: 0.1452 | IoU: 0.9190 | Dice: 0.9578 | Acc: 0.9768
Precision: 0.9488 | Recall: 0.9669 | F1: 0.9578


Train epoch 14: 100%|██████████| 939/939 [04:12<00:00,  3.72it/s]

Epoch 14 - train avg loss: 0.1097 | IoU: 0.9385 | Dice: 0.9683 | Acc: 0.9822





Val loss: 0.1421 | IoU: 0.9235 | Dice: 0.9602 | Acc: 0.9782
Precision: 0.9524 | Recall: 0.9682 | F1: 0.9602


Train epoch 15: 100%|██████████| 939/939 [04:17<00:00,  3.65it/s]

Epoch 15 - train avg loss: 0.1046 | IoU: 0.9410 | Dice: 0.9696 | Acc: 0.9829





Val loss: 0.1388 | IoU: 0.9245 | Dice: 0.9608 | Acc: 0.9785
Precision: 0.9555 | Recall: 0.9661 | F1: 0.9608


Train epoch 16: 100%|██████████| 939/939 [04:19<00:00,  3.62it/s]

Epoch 16 - train avg loss: 0.1013 | IoU: 0.9428 | Dice: 0.9706 | Acc: 0.9835





Val loss: 0.1450 | IoU: 0.9229 | Dice: 0.9599 | Acc: 0.9781
Precision: 0.9558 | Recall: 0.9640 | F1: 0.9599


Train epoch 17: 100%|██████████| 939/939 [04:25<00:00,  3.54it/s]

Epoch 17 - train avg loss: 0.1022 | IoU: 0.9425 | Dice: 0.9704 | Acc: 0.9834





Val loss: 0.1498 | IoU: 0.9175 | Dice: 0.9570 | Acc: 0.9765
Precision: 0.9522 | Recall: 0.9617 | F1: 0.9570


Train epoch 18: 100%|██████████| 939/939 [04:24<00:00,  3.55it/s]

Epoch 18 - train avg loss: 0.1013 | IoU: 0.9423 | Dice: 0.9703 | Acc: 0.9833





Val loss: 0.1479 | IoU: 0.9226 | Dice: 0.9597 | Acc: 0.9779
Precision: 0.9502 | Recall: 0.9695 | F1: 0.9597


Train epoch 19: 100%|██████████| 939/939 [04:22<00:00,  3.58it/s]

Epoch 19 - train avg loss: 0.1005 | IoU: 0.9426 | Dice: 0.9704 | Acc: 0.9834





Val loss: 0.1451 | IoU: 0.9225 | Dice: 0.9597 | Acc: 0.9781
Precision: 0.9588 | Recall: 0.9606 | F1: 0.9597


Train epoch 20: 100%|██████████| 939/939 [04:26<00:00,  3.53it/s]

Epoch 20 - train avg loss: 0.0925 | IoU: 0.9479 | Dice: 0.9732 | Acc: 0.9850





Val loss: 0.1430 | IoU: 0.9222 | Dice: 0.9595 | Acc: 0.9778
Precision: 0.9537 | Recall: 0.9654 | F1: 0.9595
Saved training history to Drive (training_unet_history_preprocessed_metrics.csv)
Model saved to /content/drive/MyDrive/Data Mining/Project/Model/unet_preprocessing.pth
