In [1]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("github")
secret_value_1 = user_secrets.get_secret("wandb")


In [2]:
!pip install -U ultralytics wandb

Collecting ultralytics
  Downloading ultralytics-8.3.183-py3-none-any.whl.metadata (37 kB)
Collecting wandb
  Downloading wandb-0.21.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.16-py3-none-any.whl.metadata (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x

In [3]:
# !pip uninstall torch torchvision
# !pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124

In [4]:
# import git
# !rm -rf "/kaggle/working"
# git.Repo.clone_from(f'https://{secret_value_0}@github.com/Beeditor04/hutech_mushroom.git', '/kaggle/working/github')

In [5]:
import yaml

def load_config(path):
    with open(path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    return config

# Train code

In [6]:
import sys
import os
import yaml
import torch

from ultralytics import YOLO

import wandb
# from wandb.integration.ultralytics import add_wandb_callback


def load_config(path):
    with open(path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    return config

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def trainer(config=None):
    PROJECT = "ppe-yolo"
    NAME = "training"

    settings = wandb.Settings(disable_code=True, disable_git=True, _disable_stats=True)

    print("Initial config from sweep:", config)

    with wandb.init(project=PROJECT, job_type="train", settings=settings) as run:
        final_config = run.config
        print("HERE!!!", dict(final_config))
        
        model_name = final_config.get("model", "yolo11n.pt")
        data_path = final_config.get("dataset", "../data/data-ppe_v4.yaml")
        
        epochs = final_config.get("epochs", 100)
        batch_size = final_config.get("batch_size", 16)
        imgsz = final_config.get("imgsz", 640)

        # hyperparameter
        lr0 = final_config.get("lr0", 0.001)
        optimizer = final_config.get("optimizer", "AdamW")
        momentum = final_config.get("momentum", 0.9)
        cos_lr = final_config.get("cos_lr", False)

        # augmentation
        hsv_v = final_config.get("hsv_v", 0.4)
        translate = final_config.get("translate", 0.1)
        scale = final_config.get("scale", 0.5)
        fliplr = final_config.get("fliplr", 0.5)
        mosaic = final_config.get("mosaic", 1.0)
        mixup = final_config.get("mixup", 0.0)
        cutmix = final_config.get("cutmix", 0.0)

        print(f"Training with:")
        print(f"  Model: {model_name}")
        print(f"  Data: {data_path}")
        print(f"  Epochs: {epochs}")
        print(f"  Batch size: {batch_size}")
        print(f"  Image size: {imgsz}")

        model = YOLO(model_name)
        # add_wandb_callback(model, enable_model_checkpointing=True)

        model.train(
            data=data_path,
            epochs=epochs,
            imgsz=imgsz,
            batch=batch_size,
            lr0=lr0,
            optimizer=optimizer,
            momentum=momentum,
            cos_lr=cos_lr,
            hsv_v=hsv_v,
            translate=translate,
            scale=scale,
            fliplr=fliplr,
            mosaic=mosaic,
            mixup=mixup,
            cutmix=cutmix,
            device=device,
            project=PROJECT,
            name=NAME,
        )


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


# Wandb setup

In [7]:
if secret_value_1:
    wandb.login(key=secret_value_1)
else:
    print("WANDB_API_KEY not found in environment. Please set it to your WandB API key.")
wandb.require("core")

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mnambeo2904[0m ([33mnambeo2904-ppe[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
# ## HYPERPARAMETER
# sweep_configuration = {
#     'method': 'grid',
#     'name': 'model_sweep',
#     'metric': {'goal': 'maximize', 'name': 'metrics/mAP50(B)'},
#     'parameters': 
#     {
#         # fixed value
#         'dataset': {'value': "/kaggle/input/ppe-detection-cs406/data-ppe_v4-kaggle.yaml"},
#         'model': {'value': "/kaggle/input/ppe-detection-cs406/yolo10n_sh17.pt"},
#         'batch_size': {'value': 16},
#         'epochs': {'value': 90},
#         'imgsz': {'value': 640}, 

#         # hyperparameter
#         'lr0': {'values': [0.01, 0.001, 0.0001]},
#         'optimizer': {'values': ["AdamW", "Adam", "SGD"]},
#         'momentum': {'value': 0.9},
#         'cos_lr': {'values': [False, True]},
        
#         # augmentation
#         'hsv_v': {'value': 0.4},
#         'translate': {'value': 0.1},
#         'scale': {'value': 0.5},
#         'fliplr': {'value': 0.5},
#         'mosaic': {'value': 1.0},
#         'mixup': {'value': 0.0},
#         'cutmix': {'value': 0.0}
#     }
# }

In [9]:
## HYPERPARAMETER
sweep_configuration = {
    'method': 'bayes',
    'name': 'model_sweep',
    'metric': {'goal': 'maximize', 'name': 'metrics/mAP50(B)'},
    'parameters': 
    {
        # fixed value
        'dataset': {'value': "/kaggle/input/ppe-detection-cs406/data-ppe_v4-kaggle.yaml"},
        'model': {'value': "/kaggle/input/ppe-detection-cs406/yolo10n_sh17.pt"},
        'batch_size': {'value': 16},
        'epochs': {'value': 70},
        'imgsz': {'value': 640}, 

        # hyperparameter
        'lr0': {'value': 0.01},
        'optimizer': {'value': "auto"},
        'momentum': {'value': 0.9},
        'cos_lr': {'value': False},
        
        # augmentation
        'hsv_v': {'min': 0.4, 'max': 0.8},
        'translate': {'min': 0.1, 'max':0.5},
        'scale': {'min': 0.5, 'max': 1.0},
        'fliplr': {'value': 0.5},
        'mosaic': {'value': 1.0},
        'mixup': {'min': 0.0, 'max': 0.5},
        'cutmix': {'min': 0.0, 'max': 0.5}
    }
}

In [10]:
# import torch
# from torch.serialization import safe_globals
# from models.yolo import DetectionModel

# with safe_globals([DetectionModel]):
#     model = torch.hub.load("ultralytics/yolov5", "yolov5n")

In [11]:

import torch
import ultralytics.nn.tasks as tasks

# Allowlist DetectionModel for safe loading
torch.serialization.add_safe_globals([tasks.DetectionModel])

In [12]:
from ultralytics import YOLO
model = YOLO('/kaggle/input/ppe-detection-cs406/yolo10n_sh17.pt')

In [13]:
import wandb

PROJECT = 'ppe-yolo'

sweep_id = wandb.sweep(sweep=sweep_configuration, project=PROJECT)
print(f"Sweep ID: {sweep_id}")

Create sweep with ID: ottxc5lj
Sweep URL: https://wandb.ai/nambeo2904-ppe/ppe-yolo/sweeps/ottxc5lj
Sweep ID: ottxc5lj


In [14]:
print(f"Sweep ID: {sweep_id}")
wandb.agent(sweep_id, function=trainer, count=32)

Sweep ID: ottxc5lj


[34m[1mwandb[0m: Agent Starting Run: i6u4w0il with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	cos_lr: False
[34m[1mwandb[0m: 	cutmix: 0.2227052968699279
[34m[1mwandb[0m: 	dataset: /kaggle/input/ppe-detection-cs406/data-ppe_v4-kaggle.yaml
[34m[1mwandb[0m: 	epochs: 70
[34m[1mwandb[0m: 	fliplr: 0.5
[34m[1mwandb[0m: 	hsv_v: 0.689432524507108
[34m[1mwandb[0m: 	imgsz: 640
[34m[1mwandb[0m: 	lr0: 0.01
[34m[1mwandb[0m: 	mixup: 0.19272913503691924
[34m[1mwandb[0m: 	model: /kaggle/input/ppe-detection-cs406/yolo10n_sh17.pt
[34m[1mwandb[0m: 	momentum: 0.9
[34m[1mwandb[0m: 	mosaic: 1
[34m[1mwandb[0m: 	optimizer: auto
[34m[1mwandb[0m: 	scale: 0.9411971087566704
[34m[1mwandb[0m: 	translate: 0.297317099370487


Initial config from sweep: None


HERE!!! {'batch_size': 16, 'cos_lr': False, 'cutmix': 0.2227052968699279, 'dataset': '/kaggle/input/ppe-detection-cs406/data-ppe_v4-kaggle.yaml', 'epochs': 70, 'fliplr': 0.5, 'hsv_v': 0.689432524507108, 'imgsz': 640, 'lr0': 0.01, 'mixup': 0.19272913503691924, 'model': '/kaggle/input/ppe-detection-cs406/yolo10n_sh17.pt', 'momentum': 0.9, 'mosaic': 1, 'optimizer': 'auto', 'scale': 0.9411971087566704, 'translate': 0.297317099370487}
Training with:
  Model: /kaggle/input/ppe-detection-cs406/yolo10n_sh17.pt
  Data: /kaggle/input/ppe-detection-cs406/data-ppe_v4-kaggle.yaml
  Epochs: 70
  Batch size: 16
  Image size: 640
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.2227052968699279, data=/kaggle/input/ppe-detection-cs406/data-ppe_v4-kaggle.yaml, degrees=0.0, deterministic=True, 

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf': 100%|██████████| 755k/755k [00:00<00:00, 55.6MB/s]


Overriding model.yaml nc=17 with nc=4

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1      9856  ultralytics.nn.modules.block.SCDown          [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1     36096  ultralytics

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt': 100%|██████████| 5.35M/5.35M [00:00<00:00, 148MB/s]


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 8.7±4.4 MB/s, size: 51.7 KB)


[34m[1mtrain: [0mScanning /kaggle/input/ppe-detection-cs406/split_v4/train/labels... 1878 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1878/1878 [00:08<00:00, 211.00it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 7.8±3.1 MB/s, size: 45.3 KB)


[34m[1mval: [0mScanning /kaggle/input/ppe-detection-cs406/split_v4/val/labels... 402 images, 0 backgrounds, 0 corrupt: 100%|██████████| 402/402 [00:02<00:00, 187.40it/s]


Plotting labels to ppe-yolo/training/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.9' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.00125, momentum=0.9) with parameter groups 95 weight(decay=0.0), 108 weight(decay=0.0005), 107 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mppe-yolo/training[0m
Starting training for 70 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/70      3.37G      3.485      6.244       3.02         43        640: 100%|██████████| 118/118 [00:33<00:00,  3.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:03<00:00,  3.76it/s]


                   all        402       2327      0.456      0.427      0.397      0.209

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/70      3.62G      3.469      4.148      2.933         57        640: 100%|██████████| 118/118 [00:31<00:00,  3.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:02<00:00,  5.13it/s]

                   all        402       2327      0.628      0.511      0.545      0.273






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/70      3.64G      3.505      3.718      2.937        105        640: 100%|██████████| 118/118 [00:30<00:00,  3.86it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:02<00:00,  5.15it/s]


                   all        402       2327      0.604      0.523      0.557      0.283

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/70      3.66G      3.527      3.588      2.927         42        640: 100%|██████████| 118/118 [00:30<00:00,  3.86it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:02<00:00,  5.02it/s]


                   all        402       2327      0.592      0.517      0.528      0.273

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/70      3.67G      3.509      3.499      2.946         38        640: 100%|██████████| 118/118 [00:30<00:00,  3.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:02<00:00,  5.09it/s]

                   all        402       2327      0.622      0.572      0.586      0.303






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/70      3.69G      3.467      3.302      2.891         55        640: 100%|██████████| 118/118 [00:30<00:00,  3.86it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:02<00:00,  4.71it/s]


                   all        402       2327      0.609      0.573      0.585      0.304

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/70       3.7G      3.455      3.214       2.87        153        640:  69%|██████▉   | 82/118 [00:26<00:11,  3.11it/s]
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1251, in _try_get_data
    data = self._data_queue.get(timeout=timeout)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/queue.py", line 179, in get
    raise Empty
_queue.Empty

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/tmp/ipykernel_36/657564274.py", line 64, in trainer
    model.train(
  File "/usr/local/lib/python3.11/dist-packages/ultralytics/engine/model.py", line 799, in train
    self.trainer.train()
  File "/usr/local/lib/python3.11/dist-packages/ultralytics/engine/trainer.py", line 227, in train
    self._do_train(world_size)
  File "/usr/local/lib/python3.11/dist-packages/ultralytics/engine/trainer.py", line 388, in _do_train
    fo

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1251, in _try_get_data
    data = self._data_queue.get(timeout=timeout)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/queue.py", line 179, in get
    raise Empty
_queue.Empty

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 297, in _run_job
    self._function()
  File "/tmp/ipykernel_36/657564274.py", line 64, in trainer
    model.train(
  File "/usr/local/lib/python3.11/dist-packages/ultralytics/engine/model.py", line 799, in train
    self.trainer.train()
  File "/usr/local/lib/python3.11/dist-packages/ultralytics/engine/trainer.py", line 227, in train
    self._do_train(world_size)
  File "/usr/local/lib/python3.11/dist-packages/ultralytics/engine/trainer.py", line 388, in _do_train
    for i, batch 