In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
!pip install ultralytics numpy pyyaml

Collecting ultralytics
  Downloading ultralytics-8.3.58-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.58-py3-none-any.whl (905 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m905.3/905.3 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.58 ultralytics-thop-2.0.13


In [2]:
import tarfile
import os

# Define the input file path and extraction directory
input_file = '/kaggle/input/ds2-dense/ds2_dense'  # Replace with the actual path
output_dir = '.'  # Replace with your desired output directory ('.' for current directory)

# Extract the .tar.gz file
with tarfile.open(input_file, 'r:gz') as tar:
    tar.extractall(path=output_dir)

print(f"Extracted {input_file} to {output_dir}")


IsADirectoryError: [Errno 21] Is a directory: '/kaggle/input/ds2-dense/ds2_dense'

In [14]:
import json
import os
from pathlib import Path
import numpy as np
import yaml
import shutil
from sklearn.model_selection import train_test_split

class DeepScoresToYOLO:
    def __init__(self, json_path, images_dir, output_dir, train_split=0.8, val_split=0.1):
        """
        Initialize the converter
        
        Args:
            json_path: Path to the DeepScores JSON file
            images_dir: Directory containing the source images
            output_dir: Directory where YOLO format dataset will be saved
            train_split: Proportion of data for training (default: 0.8)
            val_split: Proportion of data for validation (default: 0.1)
                      Note: test_split will be (1 - train_split - val_split)
        """
        self.json_path = json_path
        self.images_dir = Path(images_dir)
        self.output_dir = Path(output_dir)
        self.train_split = train_split
        self.val_split = val_split
        
        # Create output directories
        self.labels_dir = self.output_dir / 'labels'
        self.images_dir_out = self.output_dir / 'images'
        
        # Create split directories
        for split in ['train', 'val', 'test']:
            (self.labels_dir / split).mkdir(parents=True, exist_ok=True)
            (self.images_dir_out / split).mkdir(parents=True, exist_ok=True)
        
        # Load and parse JSON data
        try:
            with open(json_path, 'r') as f:
                self.data = json.load(f)
        except FileNotFoundError:
            raise FileNotFoundError(f"JSON file not found at {json_path}")
        except json.JSONDecodeError:
            raise ValueError(f"Invalid JSON file at {json_path}")
            
        # Create category mapping (DeepScores ID -> YOLO class index)
        self.category_mapping = self._create_category_mapping()
        
    def _create_category_mapping(self):
        """Create a mapping from DeepScores category IDs to YOLO class indices (0-based)"""
        deepscores_categories = {
            k: v for k, v in self.data['categories'].items() 
            if v['annotation_set'] == 'deepscores'
        }
        return {int(k): idx for idx, k in enumerate(sorted(deepscores_categories.keys()))}
    
    def _convert_bbox_to_yolo(self, bbox, img_width, img_height):
        """Convert DeepScores oriented bbox to YOLO format"""
        x1, y1, x2, y2, x3, y3, x4, y4 = bbox
        
        x_min = min(x1, x2, x3, x4)
        y_min = min(y1, y2, y3, y4)
        x_max = max(x1, x2, x3, x4)
        y_max = max(y1, y2, y3, y4)
        
        width = x_max - x_min
        height = y_max - y_min
        x_center = x_min + width / 2
        y_center = y_min + height / 2
        
        # Normalize
        x_center /= img_width
        y_center /= img_height
        width /= img_width
        height /= img_height
        
        return [x_center, y_center, width, height]
    
    def _split_dataset(self, image_list):
        """Split the dataset into train, validation, and test sets"""
        test_split = 1 - self.train_split - self.val_split
        
        # First split into train and temp (val + test)
        train_imgs, temp_imgs = train_test_split(
            image_list, 
            train_size=self.train_split,
            random_state=42
        )
        
        # Split temp into val and test
        val_size = self.val_split / (self.val_split + test_split)
        val_imgs, test_imgs = train_test_split(
            temp_imgs,
            train_size=val_size,
            random_state=42
        )
        
        return {
            'train': train_imgs,
            'val': val_imgs,
            'test': test_imgs
        }
    
    def convert(self):
        """Convert the entire dataset to YOLO format"""
        print("Starting conversion...")
        
        # Split dataset
        image_list = list(self.data['images'])
        splits = self._split_dataset(image_list)
        
        # Process each split
        total_images = len(self.data['images'])
        processed = 0
        
        for split_name, split_images in splits.items():
            print(f"\nProcessing {split_name} split...")
            
            for img_data in split_images:
                processed += 1
                if processed % 100 == 0:
                    print(f"Processing image {processed}/{total_images}")
                
                img_id = str(img_data['id'])
                img_width = img_data['width']
                img_height = img_data['height']
                
                # Copy image to split directory
                src_img_path = self.images_dir / img_data['filename']
                dst_img_path = self.images_dir_out / split_name / img_data['filename']
                if src_img_path.exists():
                    shutil.copy2(src_img_path, dst_img_path)
                else:
                    print(f"Warning: Source image not found: {src_img_path}")
                    continue
                
                # Get annotations
                img_annotations = [
                    self.data['annotations'][ann_id] 
                    for ann_id in img_data['ann_ids']
                    if ann_id in self.data['annotations']
                ]
                
                # Create label file
                label_path = self.labels_dir / split_name / f"{Path(img_data['filename']).stem}.txt"
                
                with open(label_path, 'w') as f:
                    for ann in img_annotations:
                        cat_id = int(ann['cat_id'][0])
                        
                        if cat_id not in self.category_mapping:
                            continue
                        
                        bbox = ann['o_bbox']
                        yolo_bbox = self._convert_bbox_to_yolo(bbox, img_width, img_height)
                        
                        yolo_class = self.category_mapping[cat_id]
                        bbox_str = ' '.join([f"{x:.6f}" for x in yolo_bbox])
                        f.write(f"{yolo_class} {bbox_str}\n")
        
        # Create dataset.yaml
        self._create_yaml()
        
        print("\nConversion completed successfully!")
        print(f"Train images: {len(splits['train'])}")
        print(f"Validation images: {len(splits['val'])}")
        print(f"Test images: {len(splits['test'])}")
    
    def _create_yaml(self):
        """Create YOLO dataset.yaml file"""
        categories = {v: k for k, v in self.category_mapping.items()}
        names = []
        for idx in range(len(categories)):
            cat_id = categories[idx]
            names.append(self.data['categories'][str(cat_id)]['name'])
        
        yaml_content = {
            'path': str(self.output_dir.absolute()),
            'train': 'images/train',
            'val': 'images/val',
            'test': 'images/test',
            'nc': len(names),
            'names': names
        }
        
        with open(self.output_dir / 'dataset.yaml', 'w') as f:
            yaml.dump(yaml_content, f, sort_keys=False)
        
        print(f"Created dataset.yaml with {len(names)} classes")

In [15]:
converter = DeepScoresToYOLO(
    json_path='/kaggle/input/ds2-dense/ds2_dense/deepscores_train.json',
    images_dir='/kaggle/input/ds2-dense/ds2_dense/images',
    output_dir='/kaggle/working/yamloutput'
)
converter.convert()

Starting conversion...

Processing train split...
Processing image 100/1362
Processing image 200/1362
Processing image 300/1362
Processing image 400/1362
Processing image 500/1362
Processing image 600/1362
Processing image 700/1362
Processing image 800/1362
Processing image 900/1362
Processing image 1000/1362

Processing val split...
Processing image 1100/1362
Processing image 1200/1362

Processing test split...
Processing image 1300/1362
Created dataset.yaml with 136 classes

Conversion completed successfully!
Train images: 1089
Validation images: 136
Test images: 137


First Test Over All Classes

In [2]:
from ultralytics import YOLO
import torch
from pathlib import Path

def train_yolo_deepscores(
    yaml_path,
    model_size='m',  # n, s, m, l, or x
    epochs=100,
    batch_size=8,
    imgsz=640,
    workers=8
):
    """
    Train YOLOv8 on DeepScores dataset
    
    Args:
        yaml_path: Path to dataset.yaml
        model_size: YOLO model size (n=nano, s=small, m=medium, l=large, x=xlarge)
        epochs: Number of training epochs
        batch_size: Batch size
        imgsz: Input image size
        workers: Number of worker threads for data loading
    """
    print(f"Training YOLOv8{model_size} on DeepScores dataset...")
    
    # Initialize model
    model = YOLO(f'yolov8{model_size}.pt')
    
    # Training arguments
    args = {
        'data': yaml_path,  # Path to data config file
        'epochs': epochs,  # Number of epochs
        'batch': batch_size,  # Batch size
        'imgsz': imgsz,  # Image size
        'workers': workers,  # Number of worker threads
        'patience': 50,  # Early stopping patience
        'device': 0 if torch.cuda.is_available() else 'cpu',  # Device to use
        'project': 'deepscores_detection',  # Project name
        'name': f'yolov8{model_size}_run1',  # Run name
        
        # Augmentation settings
        'degrees': 0.0,  # No rotation augmentation
        'scale': 0.5,  # Scale augmentation
        'shear': 0.0,   # No shear augmentation
        'flipud': 0.0,  # No vertical flipping
        'fliplr': 0.0,  # No horizontal flipping
        'mosaic': 0.5,  # Mosaic augmentation with reduced probability
        'mixup': 0.0,   # No mixup augmentation
        
        # Optimization settings
        'optimizer': 'AdamW',  # Use AdamW optimizer
        'lr0': 0.001,  # Initial learning rate
        'lrf': 0.01,   # Final learning rate ratio
        'momentum': 0.937,
        'weight_decay': 0.0005,
        'warmup_epochs': 3.0,
        'warmup_momentum': 0.8,
        'warmup_bias_lr': 0.1,
        'box': 7.5,    # Box loss gain
        'cls': 0.5,    # Class loss gain
        'dfl': 1.5,    # DFL loss gain
        
        # Save and validation settings
        'save': True,  # Save training results
        'save_period': -1,  # Save every x epochs (-1 for last epoch only)
        'plots': True, # Generate plots
        'rect': False, # Rectangular training
        'val': True,   # Validate during training
    }
    
    # Start training
    try:
        results = model.train(**args)
        print("Training completed successfully!")
        return results
    except Exception as e:
        print(f"Error during training: {str(e)}")
        raise

if __name__ == "__main__":

    yaml_path = "/kaggle/working/yamloutput/dataset.yaml"
    
    # Train model
    results = train_yolo_deepscores(
        yaml_path=yaml_path,
        model_size='m',  # medium size model
        epochs=100,
        batch_size=8,  
        imgsz=640,
        workers=8
    )

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Training YOLOv8m on DeepScores dataset...
Ultralytics 8.3.39 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=/kaggle/working/yamloutput/dataset.yaml, epochs=100, time=None, patience=50, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=8, project=deepscores_detection, name=yolov8m_run16, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, va

100%|██████████| 755k/755k [00:00<00:00, 16.7MB/s]
2024-12-01 19:14:43,423	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-12-01 19:14:44,231	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


Overriding model.yaml nc=80 with nc=136

                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralyti

[34m[1mtrain: [0mScanning /kaggle/working/yamloutput/labels/train.cache... 1089 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1089/1089 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
  self.pid = os.fork()
[34m[1mval: [0mScanning /kaggle/working/yamloutput/labels/val.cache... 136 images, 0 backgrounds, 0 corrupt: 100%|██████████| 136/136 [00:00<?, ?it/s]


Plotting labels to deepscores_detection/yolov8m_run16/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mdeepscores_detection/yolov8m_run16[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      8.56G      2.241      2.147      1.026        203        640: 100%|██████████| 137/137 [00:54<00:00,  2.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:06<00:00,  1.47it/s]

                   all        136      88538      0.194     0.0808     0.0914     0.0479






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100      14.7G      1.766      1.269     0.8932        205        640: 100%|██████████| 137/137 [00:54<00:00,  2.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.66it/s]

                   all        136      88538      0.285      0.122      0.136     0.0742






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/100       7.2G      1.641      1.122     0.8661        433        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.74it/s]

                   all        136      88538      0.389       0.16      0.166     0.0922






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/100      15.5G       1.53      1.007      0.851        631        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.71it/s]

                   all        136      88538      0.415      0.177        0.2      0.114






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/100      12.1G      1.446     0.9313     0.8403        260        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.75it/s]

                   all        136      88538       0.41      0.207      0.244      0.136






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      6/100      11.6G      1.408     0.8881     0.8306        379        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.72it/s]

                   all        136      88538      0.427      0.232       0.26      0.143






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      7/100      12.8G      1.357     0.8383     0.8214        253        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.73it/s]

                   all        136      88538       0.57      0.234      0.269      0.152






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      8/100      11.3G      1.362     0.8491      0.817         81        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.561      0.233      0.279       0.16






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      9/100        10G      1.287     0.7933     0.8114        221        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.70it/s]

                   all        136      88538      0.532      0.255      0.303      0.172






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     10/100      11.9G      1.258     0.7706     0.8107        482        640: 100%|██████████| 137/137 [00:54<00:00,  2.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.74it/s]

                   all        136      88538      0.557      0.259      0.309      0.177






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     11/100      11.3G      1.281     0.7696     0.8095        341        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.612      0.261      0.311      0.177






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     12/100        15G      1.231     0.7384     0.8087        257        640: 100%|██████████| 137/137 [00:54<00:00,  2.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.75it/s]

                   all        136      88538      0.557      0.262      0.307      0.177






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     13/100      12.7G      1.228     0.7325     0.8043        428        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.76it/s]

                   all        136      88538      0.576       0.27      0.321      0.188






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     14/100      11.2G       1.19     0.7124     0.7993        357        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.589      0.286       0.34      0.195






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     15/100      6.96G      1.167     0.6905     0.7954        175        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.81it/s]

                   all        136      88538      0.569      0.288      0.337      0.199






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     16/100      9.63G      1.142      0.674     0.7951        579        640: 100%|██████████| 137/137 [00:54<00:00,  2.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.567      0.298      0.354      0.209






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     17/100      7.64G      1.138     0.6748     0.7923        229        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.86it/s]

                   all        136      88538       0.61      0.311       0.36      0.214






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     18/100      10.6G      1.145     0.6754     0.7949        409        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.624      0.305      0.365       0.22






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     19/100        13G      1.124     0.6578     0.7932        150        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.636      0.325      0.393      0.235






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     20/100      10.9G      1.133     0.6561     0.7917        281        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.589      0.313      0.375      0.225






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     21/100       7.5G      1.088     0.6301     0.7896        151        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.75it/s]

                   all        136      88538      0.595      0.326      0.386      0.234






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     22/100      14.4G      1.097       0.64     0.7903        241        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.70it/s]

                   all        136      88538      0.643      0.316      0.381      0.224






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     23/100      8.15G      1.077     0.6274     0.7893        314        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.85it/s]

                   all        136      88538      0.682      0.325       0.39      0.232






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     24/100      14.4G      1.054     0.6165     0.7876        192        640: 100%|██████████| 137/137 [00:54<00:00,  2.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.596      0.326      0.391      0.232






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     25/100      8.13G      1.074     0.6238     0.7886        328        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.83it/s]

                   all        136      88538      0.664      0.342      0.407      0.244






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     26/100      11.9G      1.049     0.6105     0.7869        260        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.713       0.33      0.395      0.241






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     27/100      8.09G      1.041     0.6084      0.784        557        640: 100%|██████████| 137/137 [00:55<00:00,  2.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.84it/s]

                   all        136      88538      0.649      0.342      0.403       0.24






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     28/100      12.5G      1.052     0.6098     0.7853        759        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.645      0.337      0.403      0.239






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     29/100      9.43G       1.05     0.6084     0.7858        840        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.646      0.337      0.404      0.242






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     30/100      11.5G      1.022      0.594     0.7837        272        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.658      0.346      0.413      0.248






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     31/100      13.6G      1.025     0.5874     0.7843        155        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.636      0.346      0.413      0.252






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     32/100      13.3G      1.024     0.5907     0.7834        427        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.73it/s]

                   all        136      88538       0.64      0.348      0.413      0.249






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     33/100      8.77G      1.008     0.5885     0.7828        354        640: 100%|██████████| 137/137 [00:55<00:00,  2.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.624      0.359      0.422      0.255






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     34/100      6.83G      1.033     0.5891     0.7842        482        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.89it/s]

                   all        136      88538      0.616      0.351      0.413      0.253






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     35/100       8.1G      1.016     0.5821     0.7823        514        640: 100%|██████████| 137/137 [00:55<00:00,  2.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.88it/s]

                   all        136      88538       0.68      0.353      0.422       0.26






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     36/100      14.3G     0.9944     0.5738     0.7834        335        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.684       0.36      0.424      0.258






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     37/100      14.2G     0.9884     0.5666     0.7813        574        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.644       0.37      0.438      0.266






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     38/100      12.7G     0.9923     0.5653      0.781        244        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.661      0.357      0.423      0.259






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     39/100      15.5G      0.991     0.5674     0.7799        256        640: 100%|██████████| 137/137 [00:55<00:00,  2.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.635      0.349      0.416      0.259






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     40/100      14.9G     0.9679      0.552     0.7813         68        640: 100%|██████████| 137/137 [00:54<00:00,  2.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.74it/s]

                   all        136      88538      0.686      0.346      0.424      0.264






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     41/100      9.18G     0.9611     0.5473     0.7794        895        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.86it/s]

                   all        136      88538      0.649      0.369      0.437      0.269






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     42/100      14.3G     0.9761     0.5573     0.7795        497        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.677      0.357       0.43      0.266






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     43/100      7.31G     0.9531     0.5447     0.7808        295        640: 100%|██████████| 137/137 [00:54<00:00,  2.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.83it/s]

                   all        136      88538       0.65      0.354      0.429      0.266






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     44/100      11.3G     0.9547     0.5455     0.7784        349        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.68it/s]

                   all        136      88538       0.64      0.362      0.429      0.264






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     45/100      14.6G     0.9571     0.5472     0.7771        504        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.72it/s]

                   all        136      88538      0.674       0.37      0.438      0.268






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     46/100      13.2G     0.9545     0.5519     0.7788        562        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.668      0.369      0.439      0.268






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     47/100      10.3G     0.9401     0.5371      0.778        257        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.83it/s]

                   all        136      88538      0.646      0.354      0.424      0.266






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     48/100      11.9G     0.9527      0.545     0.7762        339        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.691      0.374      0.441      0.272






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     49/100        11G     0.9319     0.5286     0.7784        715        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.679      0.371      0.436      0.275






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     50/100        12G     0.9373      0.534     0.7786        180        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.623      0.364      0.437      0.273






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     51/100      12.4G     0.9394      0.536     0.7783        327        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.634      0.369      0.439      0.272






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     52/100      12.6G     0.9257     0.5268     0.7764        398        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.80it/s]

                   all        136      88538      0.649      0.356       0.43      0.272






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     53/100      8.63G     0.9285     0.5266     0.7769        458        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.84it/s]

                   all        136      88538      0.656      0.367       0.44       0.28






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     54/100      9.98G     0.9092     0.5199     0.7739       1190        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.631      0.365      0.433      0.281






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     55/100      11.9G     0.9163     0.5221     0.7739        448        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.85it/s]

                   all        136      88538      0.692      0.367      0.439      0.272






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     56/100      10.9G     0.9116     0.5173      0.776        429        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.66it/s]

                   all        136      88538      0.635      0.364      0.439      0.282






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     57/100      10.2G     0.9273     0.5254     0.7754        674        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.82it/s]

                   all        136      88538       0.64      0.369       0.44      0.276






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     58/100      10.1G     0.9092     0.5171     0.7754        395        640: 100%|██████████| 137/137 [00:55<00:00,  2.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.84it/s]

                   all        136      88538      0.685      0.377      0.448      0.284






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     59/100      13.4G     0.9019      0.511     0.7731        747        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.669      0.372      0.443      0.288






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     60/100      15.2G     0.9071     0.5165     0.7744       1009        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.80it/s]

                   all        136      88538      0.676      0.371      0.447      0.287






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     61/100      9.27G     0.8988     0.5102     0.7736        220        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.648      0.368      0.442      0.285






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     62/100      13.5G     0.8907     0.5018     0.7747        732        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538       0.68      0.362      0.438      0.286






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     63/100      12.7G     0.8913     0.5053     0.7741        514        640: 100%|██████████| 137/137 [00:54<00:00,  2.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.80it/s]

                   all        136      88538      0.699       0.38      0.453      0.291






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     64/100        12G      0.897     0.5094     0.7737        242        640: 100%|██████████| 137/137 [00:55<00:00,  2.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.76it/s]

                   all        136      88538      0.631      0.365      0.436      0.285






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     65/100      11.2G     0.9123     0.5137     0.7727        222        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.656      0.376       0.45      0.289






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     66/100      14.6G     0.8827     0.4997     0.7717        298        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.679      0.388      0.465      0.297






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     67/100        10G     0.8839     0.4996     0.7732        901        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.81it/s]

                   all        136      88538      0.696       0.39      0.467        0.3






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     68/100      12.9G     0.8696     0.4905     0.7734        464        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.70it/s]

                   all        136      88538      0.686      0.384      0.461        0.3






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     69/100      10.3G     0.8817     0.5001     0.7715        406        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.85it/s]

                   all        136      88538      0.701      0.395       0.47      0.305






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     70/100       9.3G     0.8714     0.4956     0.7715        772        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.88it/s]

                   all        136      88538      0.671      0.358      0.438      0.295






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     71/100        14G     0.8666     0.4869     0.7719        781        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.708      0.377      0.456      0.301






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     72/100      9.54G     0.8711     0.4914     0.7712        276        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.88it/s]

                   all        136      88538      0.736      0.374      0.453      0.296






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     73/100      10.1G     0.8532     0.4769     0.7711        405        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.86it/s]

                   all        136      88538      0.705      0.397      0.474      0.306






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     74/100      11.8G     0.8606     0.4838     0.7704        412        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.88it/s]

                   all        136      88538      0.684      0.383      0.456      0.303






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     75/100      7.86G     0.8567     0.4832     0.7687        132        640: 100%|██████████| 137/137 [00:55<00:00,  2.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.91it/s]

                   all        136      88538      0.682      0.387      0.462      0.301






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     76/100      11.6G     0.8461      0.476     0.7711        306        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.85it/s]

                   all        136      88538      0.688      0.387      0.459      0.302






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     77/100      11.2G     0.8496     0.4797       0.77        182        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.711      0.384       0.46      0.305






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     78/100      13.9G     0.8461     0.4744     0.7713        229        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.81it/s]

                   all        136      88538       0.66      0.378       0.45      0.299






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     79/100      12.8G     0.8413     0.4737     0.7685        413        640: 100%|██████████| 137/137 [00:55<00:00,  2.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.76it/s]

                   all        136      88538      0.684       0.38      0.451      0.299






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     80/100      11.2G     0.8379      0.473     0.7666        256        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.72it/s]

                   all        136      88538      0.675      0.377      0.454      0.299






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     81/100      14.1G      0.839     0.4732     0.7671        204        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.82it/s]

                   all        136      88538      0.711      0.379      0.457      0.302






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     82/100      12.7G     0.8276     0.4648      0.767        214        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.675      0.383      0.453      0.302






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     83/100        12G     0.8277     0.4633     0.7678        970        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.696      0.388      0.459      0.305






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     84/100      7.28G      0.836     0.4666     0.7671        483        640: 100%|██████████| 137/137 [00:55<00:00,  2.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.84it/s]

                   all        136      88538      0.677       0.39      0.465       0.31






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     85/100      8.77G     0.8105     0.4523     0.7674        278        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.91it/s]

                   all        136      88538      0.685      0.389      0.464      0.311






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     86/100      11.6G     0.8332     0.4692     0.7702        241        640: 100%|██████████| 137/137 [00:55<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.712      0.382      0.455      0.306






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     87/100      14.2G     0.8117     0.4578     0.7689        157        640: 100%|██████████| 137/137 [00:54<00:00,  2.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.713      0.386      0.463       0.31






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     88/100      12.6G     0.8172      0.458     0.7682        259        640: 100%|██████████| 137/137 [00:55<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.697      0.387      0.461      0.308






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     89/100      12.1G     0.8092     0.4528     0.7689        409        640: 100%|██████████| 137/137 [00:54<00:00,  2.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.79it/s]

                   all        136      88538      0.687      0.384      0.462      0.313






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     90/100      12.9G     0.8092     0.4519     0.7678        476        640: 100%|██████████| 137/137 [00:54<00:00,  2.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.74it/s]

                   all        136      88538      0.684      0.394      0.469      0.311





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()
  self.pid = os.fork()



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     91/100       8.7G     0.8118     0.4394     0.7577        318        640: 100%|██████████| 137/137 [00:54<00:00,  2.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.82it/s]

                   all        136      88538      0.692      0.384      0.459      0.309






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     92/100      8.83G     0.8111     0.4448     0.7573        281        640: 100%|██████████| 137/137 [00:51<00:00,  2.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.84it/s]

                   all        136      88538      0.692      0.388      0.466      0.311






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     93/100      9.36G     0.8034     0.4395      0.756        240        640: 100%|██████████| 137/137 [00:51<00:00,  2.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.716      0.381      0.461      0.311






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     94/100      8.51G      0.799     0.4385     0.7583        498        640: 100%|██████████| 137/137 [00:51<00:00,  2.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.87it/s]

                   all        136      88538      0.691      0.395      0.472      0.313






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     95/100      11.2G     0.7927     0.4358     0.7558        115        640: 100%|██████████| 137/137 [00:51<00:00,  2.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.80it/s]

                   all        136      88538      0.699      0.383      0.463      0.313






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     96/100      8.54G     0.7875     0.4299     0.7556        537        640: 100%|██████████| 137/137 [00:51<00:00,  2.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.82it/s]

                   all        136      88538       0.71      0.382      0.463      0.313






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     97/100      10.5G     0.7899     0.4323     0.7576        377        640: 100%|██████████| 137/137 [00:51<00:00,  2.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.77it/s]

                   all        136      88538      0.689      0.386      0.464      0.314






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     98/100      9.85G     0.7861     0.4313     0.7568        128        640: 100%|██████████| 137/137 [00:51<00:00,  2.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.80it/s]

                   all        136      88538      0.698      0.385      0.465      0.316






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     99/100      9.03G     0.7764     0.4238     0.7577        444        640: 100%|██████████| 137/137 [00:51<00:00,  2.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  1.85it/s]

                   all        136      88538      0.722      0.387      0.468      0.317






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


    100/100      9.33G     0.7809     0.4244     0.7567        241        640: 100%|██████████| 137/137 [00:51<00:00,  2.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]

                   all        136      88538      0.719      0.384      0.467      0.317






100 epochs completed in 1.702 hours.
Optimizer stripped from deepscores_detection/yolov8m_run16/weights/last.pt, 52.2MB
Optimizer stripped from deepscores_detection/yolov8m_run16/weights/best.pt, 52.2MB

Validating deepscores_detection/yolov8m_run16/weights/best.pt...
Ultralytics 8.3.39 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 218 layers, 25,918,504 parameters, 0 gradients, 79.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:41<00:00,  4.61s/it]


                   all        136      88538      0.719      0.385      0.467      0.318
                 brace         73        275      0.891     0.0592      0.401      0.345
         ornamentTrill         11         27      0.885      0.741      0.776      0.514
          ornamentTurn          2          7      0.763      0.571      0.635      0.424
  ornamentTurnInverted          8          8      0.188      0.125     0.0854     0.0209
        stringsDownBow          4         42      0.636      0.119      0.241     0.0916
          stringsUpBow          7         65      0.899      0.551      0.716      0.331
                 clef8         40        113      0.831     0.0874      0.238     0.0707
            arpeggiato         10         50      0.541      0.189      0.322      0.118
      keyboardPedalPed          2          3      0.557      0.667      0.678      0.577
       keyboardPedalUp          3         32      0.955      0.938      0.948      0.827
               tuplet

selected_classes=['restDoubleWhole', 'restWhole', 'restHalf', 'restQuarter', 
                      'rest8th', 'rest16th', 'rest32nd', 'rest64th', 'rest128th']

selected_classes=['brace', 'staff']

TRAIN MODEL OVER SELECTED CLASSES (FINAL USED CODE)

In [3]:
from ultralytics import YOLO
import torch
from pathlib import Path
import yaml

def train_yolo_deepscores(
    yaml_path,
    model_size='m',  # n, s, m, l, or x
    epochs=100,
    batch_size=2,
    imgsz=1600,
    workers=8,
    selected_classes = ['noteheadBlackOnLine','noteheadBlackOnLineSmall','noteheadBlackInSpace','noteheadBlackInSpaceSmall','noteheadHalfOnLine','noteheadHalfOnLineSmall','noteheadHalfInSpace','noteheadHalfInSpaceSmall','noteheadWholeOnLine','noteheadWholeOnLineSmall','noteheadWholeInSpace',
    'noteheadWholeInSpaceSmall',
    'noteheadDoubleWholeOnLine',
    'noteheadDoubleWholeOnLineSmall',
    'noteheadDoubleWholeInSpace',
    'noteheadDoubleWholeInSpaceSmall'
]
):
    """
    Train YOLOv8 on DeepScores dataset with selected classes
    
    Args:
        yaml_path: Path to dataset.yaml
        model_size: YOLO model size (n=nano, s=small, m=medium, l=large, x=xlarge)
        epochs: Number of training epochs
        batch_size: Batch size
        imgsz: Input image size
        workers: Number of worker threads for data loading
        selected_classes: List of class names to train on
    """
    print(f"Training YOLOv8{model_size} on DeepScores dataset...")
    
    # Load dataset configuration
    with open(yaml_path, 'r') as f:
        dataset_config = yaml.safe_load(f)
    
    # Get indices of selected classes
    try:
        selected_indices = [dataset_config['names'].index(cls) for cls in selected_classes]
    except ValueError as e:
        print(f"Error: Class not found in dataset. {e}")
        print("Available classes:", dataset_config['names'])
        raise
    
    # Initialize model
    model = YOLO(f'yolov8{model_size}.pt')
    
    # Training arguments
    args = {
        'data': yaml_path,  # Path to data config file
        'epochs': epochs,  # Number of epochs
        'batch': batch_size,  # Batch size
        'imgsz': imgsz,  # Image size
        'workers': workers,  # Number of worker threads
        'patience': 10,  # Early stopping patience
        'device': 0 if torch.cuda.is_available() else 'cpu',  # Device to use
        'project': 'deepscores_detection_noteheads',  # Project name
        'name': f'yolov8{model_size}1600_rests_classes',  # Run name
        'classes': selected_indices,  # Filter to selected classes
        
        # Augmentation settings
        'degrees': 10.0,  # No rotation augmentation
        'scale': 0.5,  # Scale augmentation
        'shear': 10.0,   # No shear augmentation
        'flipud': 0.0,  # No vertical flipping
        'fliplr': 0.0,  # No horizontal flipping
        'mosaic': 0.1,  # Mosaic augmentation with reduced probability
        'mixup': 0.1,   # No mixup augmentation
        
        
        # Optimization settings
        'optimizer': 'AdamW',  # Use AdamW optimizer
        'lr0': 0.001,  # Initial learning rate
        'lrf': 0.01,   # Final learning rate ratio
        'momentum': 0.937,
        'weight_decay': 0.0005,
        'warmup_epochs': 3.0,
        'warmup_momentum': 0.8,
        'warmup_bias_lr': 0.1,
        'box': 7.5,    # Box loss gain
        'cls': 0.5,    # Class loss gain
        'dfl': 1.5,    # DFL loss gain
        'amp': True,  # Automatic Mixed Precision
        
        # Save and validation settings
        'save': True,  # Save training results
        'save_period': -1,  # Save every x epochs (-1 for last epoch only)
        'plots': False, # Generate plots
        'rect': False, # Rectangular training
        'val': True,   # Validate during training
    }
    
    # Start training
    try:
        results = model.train(**args)
        print("Training completed successfully!")
        return results
    except Exception as e:
        print(f"Error during training: {str(e)}")
        raise

if __name__ == "__main__":
    yaml_path = "/kaggle/working/yamloutput/dataset.yaml"
    
    # Train model
    results = train_yolo_deepscores(
        yaml_path=yaml_path,
        model_size='m',  # medium size model
        epochs=50,
        batch_size=2,
        imgsz=1600,
        workers=8,
        selected_classes = ['noteheadBlackOnLine','noteheadBlackOnLineSmall','noteheadBlackInSpace','noteheadBlackInSpaceSmall','noteheadHalfOnLine','noteheadHalfOnLineSmall','noteheadHalfInSpace','noteheadHalfInSpaceSmall','noteheadWholeOnLine','noteheadWholeOnLineSmall','noteheadWholeInSpace',
    'noteheadWholeInSpaceSmall',
    'noteheadDoubleWholeOnLine',
    'noteheadDoubleWholeOnLineSmall',
    'noteheadDoubleWholeInSpace',
    'noteheadDoubleWholeInSpaceSmall'
]

    )

Training YOLOv8m on DeepScores dataset...
Ultralytics 8.3.58 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=/kaggle/working/yamloutput/dataset.yaml, epochs=50, time=None, patience=10, batch=2, imgsz=1600, save=True, save_period=-1, cache=False, device=0, workers=8, project=deepscores_detection_noteheads, name=yolov8m1600_rests_classes2, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=False, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=[54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68,

[34m[1mtrain: [0mScanning /kaggle/working/yamloutput/labels/train.cache... 1089 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1089/1089 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))



  self.pid = os.fork()
[34m[1mval: [0mScanning /kaggle/working/yamloutput/labels/val.cache... 136 images, 0 backgrounds, 0 corrupt: 100%|██████████| 136/136 [00:00<?, ?it/s]


[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 1600 train, 1600 val
Using 4 dataloader workers
Logging results to [1mdeepscores_detection_noteheads/yolov8m1600_rests_classes2[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50      15.4G      2.136      6.508      1.043        358       1600:   1%|          | 4/545 [00:02<04:36,  1.96it/s]



       1/50      15.4G      2.367      6.528      1.076        190       1600:   1%|▏         | 7/545 [00:10<13:48,  1.54s/it]



       1/50      15.5G      2.412      6.331      1.088        195       1600:   2%|▏         | 9/545 [00:20<27:04,  3.03s/it]



       1/50      15.5G       2.34      5.719      1.056        545       1600:   2%|▏         | 12/545 [00:28<20:46,  2.34s/it]



       1/50      15.5G      2.271      5.767       1.02         53       1600:   3%|▎         | 16/545 [00:37<14:21,  1.63s/it]



       1/50      15.5G      2.266      5.271      1.014        141       1600:   3%|▎         | 19/545 [00:50<23:21,  2.66s/it]



       1/50      15.5G      2.213      4.402      1.008        424       1600:   5%|▍         | 25/545 [00:59<08:55,  1.03s/it]



       1/50      15.5G      2.152      4.024     0.9969        310       1600:   5%|▌         | 29/545 [01:06<10:11,  1.18s/it]



       1/50      15.5G      2.144      3.937     0.9933        608       1600:   6%|▌         | 30/545 [01:12<22:37,  2.64s/it]



       1/50      15.5G      2.133      3.868       0.99        584       1600:   6%|▌         | 31/545 [01:19<35:12,  4.11s/it]



       1/50      15.5G      2.126      3.796     0.9889        617       1600:   6%|▌         | 32/545 [01:27<42:47,  5.00s/it]



       1/50      15.5G      2.138      3.738     0.9877        646       1600:   6%|▌         | 33/545 [01:33<45:04,  5.28s/it]



       1/50      15.5G      2.161      3.687     0.9855        210       1600:   6%|▋         | 35/545 [01:43<40:16,  4.74s/it]



  self.pid = os.fork()



KeyboardInterrupt: 

TEST MODEL

In [5]:
from ultralytics import YOLO
import cv2

# Load the trained model
model = YOLO('/kaggle/working/deepscores_detection_noteheads/yolov8m1600_rests_classes3/weights/best.pt') 

results = model('/kaggle/input/jinglebells/Jingle-Bells-Free-Sheet-Music-1200x1699.png')

# For visualization and saving results
for result in results:
    # Get the plot
    im_array = result.plot()  # plot a BGR numpy array of predictions
    
    # Save the image with predictions
    cv2.imwrite('detected_symbols.jpg', im_array)
    
    boxes = result.boxes  # get boxes on the image
    for box in boxes:
        # Get coordinates
        x1, y1, x2, y2 = box.xyxy[0]  # get box coordinates in (top, left, bottom, right) format
        confidence = box.conf[0]  # confidence score
        class_id = box.cls[0]  # class id of the detected object
        class_name = model.names[int(class_id)]  # class name
        
        print(f"Detected {class_name} with confidence {confidence:.2f} at position {x1:.1f}, {y1:.1f}, {x2:.1f}, {y2:.1f}")


image 1/1 /kaggle/input/jinglebells/Jingle-Bells-Free-Sheet-Music-1200x1699.png: 1600x1152 69 noteheadBlackOnLines, 76 noteheadBlackInSpaces, 3 noteheadHalfOnLines, 2 noteheadHalfInSpaces, 1 noteheadWholeOnLine, 1 noteheadWholeInSpace, 102.1ms
Speed: 12.3ms preprocess, 102.1ms inference, 1.9ms postprocess per image at shape (1, 3, 1600, 1152)
Detected noteheadWholeOnLine with confidence 0.91 at position 694.6, 1336.4, 711.1, 1347.3
Detected noteheadWholeInSpace with confidence 0.90 at position 694.4, 1253.0, 711.8, 1264.2
Detected noteheadBlackOnLine with confidence 0.88 at position 770.3, 652.3, 788.4, 668.3
Detected noteheadBlackOnLine with confidence 0.87 at position 941.7, 652.4, 959.9, 668.1
Detected noteheadBlackOnLine with confidence 0.87 at position 675.3, 1121.0, 693.6, 1137.1
Detected noteheadBlackOnLine with confidence 0.87 at position 941.8, 542.7, 959.9, 558.8
Detected noteheadBlackOnLine with confidence 0.86 at position 905.9, 297.6, 924.0, 313.8
Detected noteheadBlackIn

TEST 2

In [None]:
from ultralytics import YOLO
import cv2
import numpy as np

def draw_detections(image, results, conf_threshold=0.3):

    img_with_boxes = image.copy()
    
    colors = np.random.randint(0, 255, size=(len(results[0].names), 3)).tolist()
    
    for result in results:
        boxes = result.boxes
        for box in boxes:
            # Only show predictions above confidence threshold
            if box.conf[0] > conf_threshold:
                # Get box coordinates
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                
                # Get class details
                class_id = int(box.cls[0])
                class_name = result.names[class_id]
                confidence = float(box.conf[0])
                
                # Get color for this class
                color = colors[class_id]
                
                cv2.rectangle(img_with_boxes, (x1, y1), (x2, y2), color, 1)
                
                # Prepare text with class name and confidence
                text = f'{class_name}: {confidence:.2f}'
                
                # Get text size
                (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                
                # Draw semi-transparent background for text
                cv2.rectangle(img_with_boxes, 
                            (x1, y1 - text_height - 4), 
                            (x1 + text_width, y1), 
                            color, 
                            -1)  # filled rectangle
                
                # Draw text in white
                cv2.putText(img_with_boxes, 
                          text, 
                          (x1, y1 - 4), 
                          cv2.FONT_HERSHEY_SIMPLEX, 
                          0.5,       # smaller font size
                          (255, 255, 255),  # white color
                          1)        # thinner line
    
    return img_with_boxes

model = YOLO('/kaggle/working/deepscores_detection_noteheads/yolov8m1600_rests_classes3/weights/best.pt')

image_path = '/kaggle/input/reststry/Screenshot 2024-12-01 195522.png'
image = cv2.imread(image_path)

# Run
results = model(image)

output_image = draw_detections(image, results, conf_threshold=0.3)  # Adjust confidence threshold as needed

cv2.imwrite('detected_symbols_clean2.jpg', output_image)


REST: EXPERIMENTS (NOT USED)

In [3]:
from ultralytics import YOLO
import torch
from pathlib import Path
import yaml
import cv2
import numpy as np
from typing import List, Tuple, Dict
import albumentations as A

class WindowedYOLOTrainer:
    def __init__(
        self,
        window_size: Tuple[int, int] = (1600, 1600),
        overlap: float = 0.2,
        min_box_area: float = 0.01
    ):
        self.window_size = window_size
        self.overlap = overlap
        self.min_box_area = min_box_area
    
    def extract_windows_and_boxes(
        self,
        image_path: str,
        labels_path: str
    ) -> List[Tuple[np.ndarray, np.ndarray]]:
        """
        Extract windows from image and corresponding bounding boxes
        Returns list of (window, adjusted_boxes) tuples
        """
        # Read image and labels
        image = cv2.imread(image_path)
        height, width = image.shape[:2]
        
        # Read YOLO format labels (class_id, x_center, y_center, w, h)
        boxes = []
        classes = []
        if Path(labels_path).exists():
            with open(labels_path, 'r') as f:
                for line in f:
                    class_id, x_center, y_center, w, h = map(float, line.strip().split())
                    # Convert normalized coordinates to absolute
                    x_center *= width
                    y_center *= height
                    w *= width
                    h *= height
                    boxes.append([x_center, y_center, w, h])
                    classes.append(class_id)
        
        boxes = np.array(boxes)
        classes = np.array(classes)
        
        # Calculate window parameters
        stride = int((1 - self.overlap) * self.window_size[0])
        windows_with_boxes = []
        
        # Slide window over image
        for y in range(0, height - self.window_size[1] + stride, stride):
            for x in range(0, width - self.window_size[0] + stride, stride):
                # Adjust final windows to not exceed image boundaries
                window_x = min(x, width - self.window_size[0])
                window_y = min(y, height - self.window_size[1])
                
                # Extract window
                window = image[window_y:window_y + self.window_size[1],
                             window_x:window_x + self.window_size[0]]
                
                # Find boxes that overlap with window
                window_boxes = []
                window_classes = []
                
                if len(boxes) > 0:
                    for box, class_id in zip(boxes, classes):
                        x_center, y_center, w, h = box
                        
                        # Check if box overlaps with window
                        box_x1 = x_center - w/2
                        box_y1 = y_center - h/2
                        box_x2 = x_center + w/2
                        box_y2 = y_center + h/2
                        
                        window_x2 = window_x + self.window_size[0]
                        window_y2 = window_y + self.window_size[1]
                        
                        # Calculate overlap
                        if (box_x1 < window_x2 and box_x2 > window_x and
                            box_y1 < window_y2 and box_y2 > window_y):
                            
                            # Clip box to window boundaries
                            new_x1 = max(box_x1, window_x) - window_x
                            new_y1 = max(box_y1, window_y) - window_y
                            new_x2 = min(box_x2, window_x2) - window_x
                            new_y2 = min(box_y2, window_y2) - window_y
                            
                            # Calculate new center and dimensions
                            new_w = new_x2 - new_x1
                            new_h = new_y2 - new_y1
                            new_x_center = new_x1 + new_w/2
                            new_y_center = new_y1 + new_h/2
                            
                            # Convert to normalized coordinates
                            new_x_center /= self.window_size[0]
                            new_y_center /= self.window_size[1]
                            new_w /= self.window_size[0]
                            new_h /= self.window_size[1]
                            
                            # Only keep boxes with sufficient area
                            if new_w * new_h > self.min_box_area:
                                window_boxes.append([new_x_center, new_y_center, new_w, new_h])
                                window_classes.append(class_id)
                
                if len(window_boxes) > 0:
                    # Combine classes and boxes into YOLO format
                    window_labels = np.column_stack([window_classes, window_boxes])
                    windows_with_boxes.append((window, window_labels))
        
        return windows_with_boxes
    
    def prepare_windowed_dataset(
        self,
        original_yaml_path: str,
        output_dir: Path
    ) -> str:
        """
        Prepare windowed dataset from original dataset
        Returns path to new dataset.yaml
        """
        # Load original dataset config
        with open(original_yaml_path, 'r') as f:
            dataset_config = yaml.safe_load(f)
        
        # Create output directories
        output_dir = Path(output_dir)
        for split in ['train', 'val', 'test']:
            (output_dir / split / 'images').mkdir(parents=True, exist_ok=True)
            (output_dir / split / 'labels').mkdir(parents=True, exist_ok=True)
        
        # Process each split
        for split in ['train', 'val', 'test']:
            split_dir = Path(dataset_config['path']) / split
            image_files = list((split_dir / 'images').glob('*.jpg')) + \
                         list((split_dir / 'images').glob('*.png'))
            
            for idx, image_file in enumerate(image_files):
                # Get corresponding label file
                label_file = (split_dir / 'labels' / image_file.stem).with_suffix('.txt')
                
                # Extract windows and boxes
                windows_with_boxes = self.extract_windows_and_boxes(
                    str(image_file),
                    str(label_file)
                )
                
                # Save windows and corresponding labels
                for window_idx, (window, labels) in enumerate(windows_with_boxes):
                    window_name = f"{image_file.stem}_window_{window_idx}"
                    
                    # Save window image
                    cv2.imwrite(
                        str(output_dir / split / 'images' / f"{window_name}.jpg"),
                        window
                    )
                    
                    # Save window labels
                    np.savetxt(
                        str(output_dir / split / 'labels' / f"{window_name}.txt"),
                        labels,
                        fmt='%g',
                        delimiter=' '
                    )
        
        # Create new dataset.yaml
        new_yaml = {
            'path': str(output_dir),
            'train': str(output_dir / 'train'),
            'val': str(output_dir / 'val'),
            'test': str(output_dir / 'test'),
            'names': dataset_config['names']
        }
        
        new_yaml_path = output_dir / 'dataset.yaml'
        with open(new_yaml_path, 'w') as f:
            yaml.dump(new_yaml, f)
        
        return str(new_yaml_path)

def train_yolo_deepscores_windowed(
    yaml_path: str,
    model_size: str = 'm',
    epochs: int = 100,
    batch_size: int = 4,
    window_size: Tuple[int, int] = (1600, 1600),
    overlap: float = 0.2,
    workers: int = 8,
    selected_classes: List[str] = None
):
    """
    Train YOLOv8 on DeepScores dataset using sliding windows
    """
    print(f"Preparing windowed dataset for YOLOv8{model_size} training...")
    
    # Initialize windowed trainer
    trainer = WindowedYOLOTrainer(
        window_size=window_size,
        overlap=overlap
    )
    
    # Prepare windowed dataset
    output_dir = Path('deepscores_windowed')
    new_yaml_path = trainer.prepare_windowed_dataset(yaml_path, output_dir)
    
    # Load dataset configuration
    with open(new_yaml_path, 'r') as f:
        dataset_config = yaml.safe_load(f)
    
    # Get indices of selected classes if specified
    selected_indices = None
    if selected_classes:
        try:
            selected_indices = [dataset_config['names'].index(cls) for cls in selected_classes]
        except ValueError as e:
            print(f"Error: Class not found in dataset. {e}")
            print("Available classes:", dataset_config['names'])
            raise
    
    # Initialize model
    model = YOLO(f'yolov8{model_size}.pt')
    
    # Training arguments
    args = {
        'data': new_yaml_path,
        'epochs': epochs,
        'batch': batch_size,
        'imgsz': window_size[0],  # Using window size as image size
        'workers': workers,
        'patience': 10,
        'device': 0 if torch.cuda.is_available() else 'cpu',
        'project': 'deepscores_detection_windowed',
        'name': f'yolov8{model_size}_windowed',
        'classes': selected_indices,
        
        # Augmentation settings
        'degrees': 10.0,
        'scale': 0.5,
        'shear': 10.0,
        'flipud': 0.0,
        'fliplr': 0.0,
        'mosaic': 0.1,
        'mixup': 0.1,
        
        # Optimization settings
        'optimizer': 'AdamW',
        'lr0': 0.001,
        'lrf': 0.01,
        'momentum': 0.937,
        'weight_decay': 0.0005,
        'warmup_epochs': 3.0,
        'warmup_momentum': 0.8,
        'warmup_bias_lr': 0.1,
        'box': 7.5,
        'cls': 0.5,
        'dfl': 1.5,
        'amp': True,
    }
    
    # Start training
    try:
        results = model.train(**args)
        print("Training completed successfully!")
        return results
    except Exception as e:
        print(f"Error during training: {str(e)}")
        raise

# Example usage
if __name__ == "__main__":
    yaml_path = "/kaggle/working/yamloutput/dataset.yaml"
    selected_classes = [
        'noteheadBlackOnLine', 'noteheadBlackOnLineSmall',
        'noteheadBlackInSpace', 'noteheadBlackInSpaceSmall',
        'noteheadHalfOnLine', 'noteheadHalfOnLineSmall',
        'noteheadHalfInSpace', 'noteheadHalfInSpaceSmall',
        'noteheadWholeOnLine', 'noteheadWholeOnLineSmall',
        'noteheadWholeInSpace', 'noteheadWholeInSpaceSmall',
        'noteheadDoubleWholeOnLine', 'noteheadDoubleWholeOnLineSmall',
        'noteheadDoubleWholeInSpace', 'noteheadDoubleWholeInSpaceSmall'
    ]
    
    results = train_yolo_deepscores_windowed(
        yaml_path=yaml_path,
        model_size='m',
        epochs=50,
        batch_size=4,
        window_size=(420, 420),
        overlap=0.2,
        workers=8,
        selected_classes=selected_classes
    )

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


  check_for_updates()


Preparing windowed dataset for YOLOv8m training...
Ultralytics 8.3.58 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=deepscores_windowed/dataset.yaml, epochs=50, time=None, patience=10, batch=4, imgsz=420, save=True, save_period=-1, cache=False, device=0, workers=8, project=deepscores_detection_windowed, name=yolov8m_windowed, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=[54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71], r

RuntimeError: Dataset 'deepscores_windowed/dataset.yaml' error ❌ 
Dataset 'deepscores_windowed/dataset.yaml' images not found ⚠️, missing path '/kaggle/working/datasets/deepscores_windowed/deepscores_windowed/val'
Note dataset download directory is '/kaggle/working/datasets'. You can update this in '/root/.config/Ultralytics/settings.json'

In [2]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.70-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.70-py3-none-any.whl (914 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m914.9/914.9 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.70 ultralytics-thop-2.0.14


In [3]:
import json
import cv2
import yaml
from pathlib import Path
from sklearn.model_selection import train_test_split
from ultralytics import YOLO

def create_sliding_window_dataset(
    json_path,
    images_dir,
    output_dir,
    window_size=(512, 512),
    overlap=0.2
):
    """
    Preprocess dataset by creating sliding window crops of large images
    and adjusting their annotations accordingly.
    """
    # Create output directories
    output_dir = Path(output_dir)
    (output_dir / 'images' / 'train').mkdir(parents=True, exist_ok=True)
    (output_dir / 'images' / 'val').mkdir(parents=True, exist_ok=True)
    (output_dir / 'images' / 'test').mkdir(parents=True, exist_ok=True)
    (output_dir / 'labels' / 'train').mkdir(parents=True, exist_ok=True)
    (output_dir / 'labels' / 'val').mkdir(parents=True, exist_ok=True)
    (output_dir / 'labels' / 'test').mkdir(parents=True, exist_ok=True)
    
    # Load original annotations
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    # Calculate stride
    stride_x = int(window_size[0] * (1 - overlap))
    stride_y = int(window_size[1] * (1 - overlap))
    
    new_annotations = {
        'train': [],
        'val': [],
        'test': []
    }
    
    # Split dataset (you can reuse your existing split logic)
    image_list = list(data['images'])
    splits = train_test_split(image_list, test_size=0.2, random_state=42)
    train_imgs, temp = splits
    val_imgs, test_imgs = train_test_split(temp, test_size=0.5, random_state=42)
    
    split_data = {
        'train': train_imgs,
        'val': val_imgs,
        'test': test_imgs
    }
    
    # Process each split
    for split_name, split_images in split_data.items():
        print(f"Processing {split_name} split...")
        
        for img_idx, img_data in enumerate(split_images):
            if img_idx % 100 == 0:
                print(f"Processing image {img_idx}/{len(split_images)}")
                
            # Load image
            img_path = Path(images_dir) / img_data['filename']
            img = cv2.imread(str(img_path))
            if img is None:
                print(f"Warning: Could not load image {img_path}")
                continue
                
            # Save original image without resizing
            orig_img_path = output_dir / 'images' / split_name / f"{Path(img_data['filename']).stem}_original.jpg"
            cv2.imwrite(str(orig_img_path), img)
            
            height, width = img.shape[:2]
            
            # Get annotations for this image
            img_annotations = [
                data['annotations'][ann_id] 
                for ann_id in img_data['ann_ids']
                if ann_id in data['annotations']
            ]
            
            # Save original annotations in YOLO format
            orig_annotations = []
            for ann in img_annotations:
                orig_bbox = ann['o_bbox']
                yolo_bbox = convert_bbox_to_yolo(orig_bbox, width, height)
                orig_annotations.append({
                    'bbox': yolo_bbox,
                    'category_id': ann['cat_id'][0]
                })
                
            # Save original annotations
            with open(output_dir / 'labels' / split_name / f"{Path(img_data['filename']).stem}_original.txt", 'w') as f:
                for ann in orig_annotations:
                    bbox_str = ' '.join(map(str, ann['bbox']))
                    f.write(f"{ann['category_id']} {bbox_str}\n")
            
            window_idx = 0
            # Slide window over image
            for y in range(0, height - window_size[1] + stride_y, stride_y):
                for x in range(0, width - window_size[0] + stride_x, stride_x):
                    # Adjust final window position
                    end_x = min(x + window_size[0], width)
                    end_y = min(y + window_size[1], height)
                    x = max(0, end_x - window_size[0])
                    y = max(0, end_y - window_size[1])
                    
                    # Extract window
                    window = img[y:end_y, x:end_x]
                    
                    # Create window annotations
                    window_annotations = []
                    for ann in img_annotations:
                        # Convert original bbox to absolute coordinates
                        orig_bbox = ann['o_bbox']  # [x1,y1,x2,y2,x3,y3,x4,y4]
                        
                        # Check if bbox intersects with window
                        bbox_x = min(orig_bbox[0::2])
                        bbox_y = min(orig_bbox[1::2])
                        bbox_width = max(orig_bbox[0::2]) - bbox_x
                        bbox_height = max(orig_bbox[1::2]) - bbox_y
                        
                        # Check intersection
                        if (bbox_x + bbox_width >= x and 
                            bbox_x <= end_x and 
                            bbox_y + bbox_height >= y and 
                            bbox_y <= end_y):
                            
                            # Adjust coordinates relative to window
                            new_bbox = []
                            for i in range(0, len(orig_bbox), 2):
                                new_bbox.extend([
                                    max(0, min(window_size[0], orig_bbox[i] - x)),
                                    max(0, min(window_size[1], orig_bbox[i+1] - y))
                                ])
                            
                            # Convert to YOLO format
                            yolo_bbox = convert_bbox_to_yolo(
                                new_bbox, 
                                window_size[0], 
                                window_size[1]
                            )
                            
                            window_annotations.append({
                                'bbox': yolo_bbox,
                                'category_id': ann['cat_id'][0]
                            })
                    
                    # Save window and annotations if it contains any objects
                    if window_annotations:
                        window_name = f"{Path(img_data['filename']).stem}_win{window_idx}"
                        
                        # Resize window to 512x512
                        window = cv2.resize(window, window_size)
                        
                        # Save image
                        cv2.imwrite(
                            str(output_dir / 'images' / split_name / f"{window_name}.jpg"),
                            window
                        )
                        
                        # Save annotations
                        with open(output_dir / 'labels' / split_name / f"{window_name}.txt", 'w') as f:
                            for ann in window_annotations:
                                bbox_str = ' '.join(map(str, ann['bbox']))
                                f.write(f"{ann['category_id']} {bbox_str}\n")
                        
                        window_idx += 1
    
    # Create new dataset.yaml
    yaml_content = {
        'path': str(output_dir.absolute()),
        'train': 'images/train',
        'val': 'images/val',
        'test': 'images/test',
        'nc': len(data['categories']),
        'names': [data['categories'][str(i)]['name'] 
                 for i in range(len(data['categories']))]
    }
    
    with open(output_dir / 'dataset.yaml', 'w') as f:
        yaml.dump(yaml_content, f, sort_keys=False)
    
    print("Dataset preprocessing completed!")
    return str(output_dir / 'dataset.yaml')

def convert_bbox_to_yolo(bbox, img_width, img_height):
    """Convert bbox to YOLO format"""
    x_coords = bbox[0::2]
    y_coords = bbox[1::2]
    
    x_min = min(x_coords)
    y_min = min(y_coords)
    width = max(x_coords) - x_min
    height = max(y_coords) - y_min
    
    # Convert to YOLO format (normalized)
    x_center = (x_min + width/2) / img_width
    y_center = (y_min + height/2) / img_height
    width = width / img_width
    height = height / img_height
    
    return [x_center, y_center, width, height]

# Usage example:
yaml_path = create_sliding_window_dataset(
    json_path='/kaggle/input/ds2-dense/ds2_dense/deepscores_train.json',
    images_dir='/kaggle/input/ds2-dense/ds2_dense/images',
    output_dir='deepscores_windowed/ayolo_dataset3',
    window_size=(512, 512),
    overlap=0.2
)

# Initialize YOLOv8 model
model = YOLO('yolov8m.pt')  # Load pretrained YOLOv8 medium model

# Train the model using the prepared dataset
model.train(
    data=yaml_path,  # Path to data YAML file
    epochs=100,      # Number of epochs
    imgsz=1600,     # Image size
    batch=4,        # Batch size
    workers=8,      # Number of workers
    device=0        # GPU device (0 for first GPU)
)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Processing train split...
Processing image 0/1089
Processing image 100/1089
Processing image 200/1089
Processing image 300/1089
Processing image 400/1089
Processing image 500/1089
Processing image 600/1089
Processing image 700/1089
Processing image 800/1089
Processing image 900/1089
Processing image 1000/1089
Processing val split...
Processing image 0/136
Processing image 100/136
Processing test split...
Processing image 0/137
Processing image 100/137


KeyError: '0'

In [2]:
from ultralytics import YOLO
import torch
from pathlib import Path
import yaml
import gc

def train_yolo_deepscores(
    yaml_path,
    model_size='m',
    epochs=100,
    batch_size=2,
    imgsz=1900,
    workers=8,
    selected_classes = [
        'augmentationDot',
    ]



):
    """
    Memory-optimized YOLOv8 training on DeepScores dataset
    """
    # Clear GPU cache before starting
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()
    
    print(f"Training YOLOv8{model_size} on DeepScores dataset...")
    
    # Load dataset configuration
    with open(yaml_path, 'r') as f:
        dataset_config = yaml.safe_load(f)
    
    # Get indices of selected classes
    try:
        selected_indices = [dataset_config['names'].index(cls) for cls in selected_classes]
    except ValueError as e:
        print(f"Error: Class not found in dataset. {e}")
        print("Available classes:", dataset_config['names'])
        raise
    
    # Initialize model with deterministic settings for better memory management
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    model = YOLO(f'yolov8{model_size}.pt')
    
    # Calculate optimal batch size based on available GPU memory
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.get_device_properties(0).total_memory
        # Rough estimation - adjust these values based on your specific case
        memory_per_image = (imgsz * imgsz * 3 * 4) / (1024 * 1024 * 1024)  # in GB
        optimal_batch = min(batch_size, int(gpu_memory / (memory_per_image * 1024 * 1024 * 1024)))
        batch_size = max(1, optimal_batch)
        print(f"Adjusted batch size to {batch_size} based on available GPU memory")
    
    # Training arguments with memory optimization
    args = {
        'data': yaml_path,
        'epochs': epochs,
        'batch': batch_size,
        'imgsz': imgsz,
        'workers': min(workers, 8),  # Reduce worker threads to prevent memory overload
        'patience': 10,
        'device': 0 if torch.cuda.is_available() else 'cpu',
        'project': 'deepscores_detection_dots_1',
        'name': f'yolov8{model_size}1900_dots_classes',
        'classes': selected_indices,
        
        # Reduced augmentation to save memory
        'degrees': 10.0,
        'scale': 0.7,
        'shear': 10.0,
        'flipud': 0.0,
        'fliplr': 0.0,
        'mosaic': 0.2,  # Disabled mosaic to reduce memory usage
        'mixup': 0.0,   # Disabled mixup to reduce memory usage
        
        # Memory-efficient optimization settings
        'optimizer': 'AdamW',
        'lr0': 0.002, #0.001 default
        'lrf': 0.01,
        'momentum': 0.937,
        'weight_decay': 0.0001, #0.0005 default
        'warmup_epochs': 3.0,
        'warmup_momentum': 0.8,
        'warmup_bias_lr': 0.1,
        'box': 7.5,
        'cls': 0.5,
        'dfl': 2.0, #1.5 default
        'amp': True,    # Enable Automatic Mixed Precision for memory efficiency
        
        # Reduced saving frequency to save disk I/O
        'save': True,
        'save_period': 10,  # Save every 10 epochs
        'plots': False,     # Disable plotting to save memory
        'rect': False,       # Enable rectangular training for better memory usage
        'val': True,
        'cache': 'ram',     # Cache images in RAM instead of disk
        
        # Additional memory optimization parameters
        'overlap_mask': False,  # Disable mask overlap to save memory
        'multi_scale': False,   # Disable multi-scale training
        'fraction': 1.0,        # Use full dataset
    }
    
    # Start training with memory monitoring
    try:
        # Enable gradient checkpointing if available
        if hasattr(model.model, 'gradient_checkpointing_enable'):
            model.model.gradient_checkpointing_enable()
        
        results = model.train(**args)
        print("Training completed successfully!")
        
        # Clear memory after training
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()
            
        return results
    except Exception as e:
        print(f"Error during training: {str(e)}")
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()
        raise

if __name__ == "__main__":
    yaml_path = "/kaggle/working/yamloutput/dataset.yaml"
    
    results = train_yolo_deepscores(
        yaml_path=yaml_path,
        model_size='m',
        epochs=50,
        batch_size=2,
        imgsz=1900,
        workers=8,  # Reduced worker count
        selected_classes = [
            'augmentationDot'
        ]


    )

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Training YOLOv8m on DeepScores dataset...
Adjusted batch size to 2 based on available GPU memory
Ultralytics 8.3.70 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=/kaggle/working/yamloutput/dataset.yaml, epochs=50, time=None, patience=10, batch=2, imgsz=1900, save=True, save_period=10, cache=ram, device=0, workers=8, project=deepscores_detection_dots_1, name=yolov8m1900_dots_classes, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=No

100%|██████████| 755k/755k [00:00<00:00, 17.5MB/s]
2025-02-03 11:48:07,454	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-02-03 11:48:08,323	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


Overriding model.yaml nc=80 with nc=136

                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralyti

[34m[1mtrain: [0mScanning /kaggle/working/yamloutput/labels/train.cache... 1089 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1089/1089 [00:00<?, ?it/s]




[34m[1mtrain: [0mCaching images (7.9GB RAM): 100%|██████████| 1089/1089 [00:39<00:00, 27.85it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
  self.pid = os.fork()
[34m[1mval: [0mScanning /kaggle/working/yamloutput/labels/val.cache... 136 images, 0 backgrounds, 0 corrupt: 100%|██████████| 136/136 [00:00<?, ?it/s]




[34m[1mval: [0mCaching images (1.0GB RAM): 100%|██████████| 136/136 [00:05<00:00, 24.52it/s]
  self.pid = os.fork()


[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0001), 83 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 1920 train, 1920 val
Using 2 dataloader workers
Logging results to [1mdeepscores_detection_dots_1/yolov8m1900_dots_classes[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50      9.53G      2.333      7.541     0.9411          1       1920: 100%|██████████| 545/545 [05:55<00:00,  1.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:12<00:00,  2.69it/s]

                   all        136       1885      0.654      0.496      0.516      0.147






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50       9.1G      2.282      1.297     0.9371          6       1920: 100%|██████████| 545/545 [05:55<00:00,  1.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.94it/s]

                   all        136       1885      0.678      0.438      0.534      0.161






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50      8.51G      2.175      1.125     0.9259         16       1920: 100%|██████████| 545/545 [05:53<00:00,  1.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.93it/s]

                   all        136       1885      0.636      0.511      0.458      0.124






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50      9.24G      2.174     0.9712      0.919         10       1920: 100%|██████████| 545/545 [05:53<00:00,  1.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.94it/s]

                   all        136       1885      0.731      0.533      0.604      0.178






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50      9.19G      2.161     0.9469     0.9034          4       1920: 100%|██████████| 545/545 [05:52<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.94it/s]

                   all        136       1885      0.488       0.63      0.557      0.183






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50      9.31G      2.101     0.9198     0.9096          0       1920: 100%|██████████| 545/545 [05:52<00:00,  1.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.93it/s]

                   all        136       1885      0.691       0.52      0.568      0.172






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50      9.16G      2.069     0.9101      0.914          0       1920: 100%|██████████| 545/545 [05:52<00:00,  1.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.92it/s]

                   all        136       1885      0.732      0.599      0.624      0.214






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50       9.3G      2.083     0.9487     0.9315          4       1920: 100%|██████████| 545/545 [05:52<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.94it/s]

                   all        136       1885      0.747       0.55       0.62      0.198






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/50      9.36G      1.989     0.8486     0.9145          7       1920: 100%|██████████| 545/545 [05:51<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.93it/s]

                   all        136       1885      0.719      0.509      0.565      0.169






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/50      10.1G      1.936     0.8357     0.8951         10       1920: 100%|██████████| 545/545 [05:51<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.93it/s]

                   all        136       1885       0.72      0.568      0.646      0.198






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/50      8.71G      1.914      0.833     0.8979          2       1920: 100%|██████████| 545/545 [05:51<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.93it/s]

                   all        136       1885      0.678      0.482      0.564       0.17






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/50      9.26G      1.978     0.8497     0.9191         19       1920: 100%|██████████| 545/545 [05:52<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.94it/s]

                   all        136       1885      0.757      0.542      0.639      0.201






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/50      9.12G      1.872     0.7929     0.8954         20       1920: 100%|██████████| 545/545 [05:51<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.93it/s]

                   all        136       1885      0.716      0.532      0.609      0.191






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/50      8.46G      1.973     0.8564     0.9027          2       1920: 100%|██████████| 545/545 [05:52<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.93it/s]

                   all        136       1885      0.767      0.559      0.632       0.21






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/50      8.35G      1.934     0.8122     0.8852          0       1920: 100%|██████████| 545/545 [05:51<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.93it/s]

                   all        136       1885      0.743       0.52      0.613      0.198






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/50      9.01G      1.838     0.7953     0.8902         18       1920: 100%|██████████| 545/545 [05:51<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.93it/s]

                   all        136       1885      0.751      0.569      0.622      0.191






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/50      8.93G      1.857     0.7989     0.9002         11       1920: 100%|██████████| 545/545 [05:51<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:11<00:00,  2.94it/s]

                   all        136       1885       0.76      0.561      0.639      0.213
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 10 epochs. Best results observed at epoch 7, best model saved as best.pt.
To update EarlyStopping(patience=10) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






17 epochs completed in 1.728 hours.
Optimizer stripped from deepscores_detection_dots_1/yolov8m1900_dots_classes/weights/last.pt, 52.5MB
Optimizer stripped from deepscores_detection_dots_1/yolov8m1900_dots_classes/weights/best.pt, 52.5MB

Validating deepscores_detection_dots_1/yolov8m1900_dots_classes/weights/best.pt...
Ultralytics 8.3.70 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 218 layers, 25,918,504 parameters, 0 gradients, 79.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 34/34 [00:09<00:00,  3.42it/s]

                   all        136       1885      0.732      0.598      0.624      0.214
       augmentationDot        102       1885      0.732      0.598      0.624      0.214
Speed: 1.2ms preprocess, 69.4ms inference, 0.0ms loss, 0.8ms postprocess per image





Training completed successfully!


In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.74-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.74-py3-none-any.whl (914 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m914.7/914.7 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.74 ultralytics-thop-2.0.14


In [2]:
from ultralytics import YOLO
import torch
from pathlib import Path
import yaml
import gc

def train_yolo_deepscores(
    yaml_path,
    model_size='m',
    epochs=50,
    batch_size=2,
    imgsz=1900,
    workers=8,
    selected_classes = [
        'stem',
    ]



):
    """
    Memory-optimized YOLOv8 training on DeepScores dataset
    """
    # Clear GPU cache before starting
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()
    
    print(f"Training YOLOv8{model_size} on DeepScores dataset...")
    
    # Load dataset configuration
    with open(yaml_path, 'r') as f:
        dataset_config = yaml.safe_load(f)
    
    # Get indices of selected classes
    try:
        selected_indices = [dataset_config['names'].index(cls) for cls in selected_classes]
    except ValueError as e:
        print(f"Error: Class not found in dataset. {e}")
        print("Available classes:", dataset_config['names'])
        raise
    
    # Initialize model with deterministic settings for better memory management
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    model = YOLO(f'yolo11{model_size}.pt')
    
    # Calculate optimal batch size based on available GPU memory
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.get_device_properties(0).total_memory
        # Rough estimation - adjust these values based on your specific case
        memory_per_image = (imgsz * imgsz * 3 * 4) / (1024 * 1024 * 1024)  # in GB
        optimal_batch = min(batch_size, int(gpu_memory / (memory_per_image * 1024 * 1024 * 1024)))
        batch_size = max(1, optimal_batch)
        print(f"Adjusted batch size to {batch_size} based on available GPU memory")

    # Determine device setting dynamically
    if torch.cuda.device_count() > 1:
        device = ','.join(str(i) for i in range(torch.cuda.device_count()))
    else:
        device = '0'
        
    # Training arguments with memory optimization
    args = {
        'data': yaml_path,
        'epochs': epochs,
        'batch': batch_size,
        'imgsz': imgsz,
        'workers': min(workers, 8),  # Reduce worker threads to prevent memory overload
        'patience': 6,
        'device': device,
        'project': 'deepscores_detection_stems_1_11x',
        'name': f'yolo11{model_size}1900_stem_classes',
        'classes': selected_indices,
        'single_cls': True,
        
        # Reduced augmentation to save memory
        'degrees': 10.0,
        'scale': 0.8,
        'shear': 10.0,
        'flipud': 0.0,
        'fliplr': 0.0,
        'mosaic': 0.2,  # Disabled mosaic to reduce memory usage
        'mixup': 0.0,   # Disabled mixup to reduce memory usage
        
        # Memory-efficient optimization settings
        'optimizer': 'AdamW',
        'lr0': 0.002, #0.001 default
        'lrf': 0.01,
        'momentum': 0.937,
        'weight_decay': 0.0001, #0.0005 default
        'warmup_epochs': 3.0,
        'warmup_momentum': 0.8,
        'warmup_bias_lr': 0.1,
        'box': 7.5,
        'cls': 0.5,
        'dfl': 2.0, #1.5 default
        'amp': True,    # Enable Automatic Mixed Precision for memory efficiency
        
        # Reduced saving frequency to save disk I/O
        'save': True,
        'save_period': 10,  # Save every 10 epochs
        'plots': False,     # Disable plotting to save memory
        'rect': False,       # Enable rectangular training for better memory usage
        'val': True,
        'cache': 'ram',     # Cache images in RAM instead of disk
        
        # Additional memory optimization parameters
        'overlap_mask': False,  # Disable mask overlap to save memory
        'multi_scale': True,   # Disable multi-scale training
        'fraction': 1.0,        # Use full dataset
    }
    
    # Start training with memory monitoring
    try:
        # Enable gradient checkpointing if available
        if hasattr(model.model, 'gradient_checkpointing_enable'):
            model.model.gradient_checkpointing_enable()
        
        results = model.train(**args)
        print("Training completed successfully!")
        
        # Clear memory after training
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()
            
        return results
    except Exception as e:
        print(f"Error during training: {str(e)}")
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()
        raise

if __name__ == "__main__":
    yaml_path = "/kaggle/working/yamloutput/dataset.yaml"
    
    results = train_yolo_deepscores(
        yaml_path=yaml_path,
        model_size='m',
        epochs=50,
        batch_size=2,
        imgsz=1900,
        workers=8,  # Reduced worker count
        selected_classes = [
            'stem'
        ]


    )

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Training YOLOv8m on DeepScores dataset...
Adjusted batch size to 2 based on available GPU memory
Ultralytics 8.3.74 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
                                                 CUDA:1 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11m.pt, data=/kaggle/working/yamloutput/dataset.yaml, epochs=50, time=None, patience=6, batch=2, imgsz=1900, save=True, save_period=10, cache=ram, device=0,1, workers=8, project=deepscores_detection_stems_1_11x, name=yolo11m1900_stem_classes, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=True, rect=False, cos_lr=Fals

100%|██████████| 755k/755k [00:00<00:00, 17.3MB/s]
2025-02-12 11:13:41,410	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-02-12 11:13:42,189	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


Overriding model.yaml nc=80 with nc=136

                   from  n    params  module                                       arguments                     
  0                  -1  1      1856  ultralytics.nn.modules.conv.Conv             [3, 64, 3, 2]                 
  1                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  2                  -1  1    111872  ultralytics.nn.modules.block.C3k2            [128, 256, 1, True, 0.25]     
  3                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
  4                  -1  1    444928  ultralytics.nn.modules.block.C3k2            [256, 512, 1, True, 0.25]     
  5                  -1  1   2360320  ultralytics.nn.modules.conv.Conv             [512, 512, 3, 2]              
  6                  -1  1   1380352  ultralytics.nn.modules.block.C3k2            [512, 512, 1, True]           
  7                  -1  1   2360320  ultralyti

[34m[1mtrain: [0mScanning /kaggle/working/yamloutput/labels/train.cache... 1089 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1089/1089 [00:00<?, ?it/s]




[34m[1mtrain: [0mCaching images (7.9GB RAM): 100%|██████████| 1089/1089 [00:40<00:00, 27.05it/s]
  check_for_updates()
  self.pid = os.fork()


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /kaggle/working/yamloutput/labels/val.cache... 136 images, 0 backgrounds, 0 corrupt: 100%|██████████| 136/136 [00:00<?, ?it/s]




[34m[1mval: [0mCaching images (1.0GB RAM): 100%|██████████| 136/136 [00:10<00:00, 13.29it/s]
  self.pid = os.fork()


[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.937) with parameter groups 106 weight(decay=0.0), 113 weight(decay=0.0001), 112 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 1920 train, 1920 val
Using 4 dataloader workers
Logging results to [1mdeepscores_detection_stems_1_11x/yolo11m1900_stem_classes[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  check_for_updates()
       1/50      15.5G      2.044       6.25       1.08        192       1728:  53%|█████▎    | 289/545 [03:22<02:06,  2.02it/s]



       1/50      15.5G      1.997      4.631      1.085        149       2784: 100%|██████████| 545/545 [06:06<00:00,  1.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:16<00:00,  4.16it/s]


                   all        136      23488    0.00135    0.00234   0.000676   0.000144

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50      13.6G      1.781      2.158      1.051         13       1376:  27%|██▋       | 149/545 [01:27<03:52,  1.70it/s]



       2/50      14.5G      1.814      2.165      1.059        173       1952:  98%|█████████▊| 536/545 [05:19<00:04,  1.95it/s]



       2/50      14.6G      1.824      2.166       1.06        235       2752: 100%|██████████| 545/545 [05:35<00:00,  1.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.45it/s]


                   all        136      23488     0.0149     0.0258    0.00765      0.002

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50      14.6G      1.788       2.05      1.044          1       2464: 100%|██████████| 545/545 [05:26<00:00,  1.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.41it/s]


                   all        136      23488     0.0405      0.062     0.0233     0.0064

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50      14.1G      1.698      1.558      1.019         72       1632: 100%|██████████| 545/545 [05:15<00:00,  1.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.40it/s]


                   all        136      23488     0.0345     0.0577     0.0197     0.0051

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50      14.5G      1.598      1.522     0.9531        180       1280:  91%|█████████ | 495/545 [04:57<00:31,  1.58it/s]



       5/50      14.5G      1.608      1.523     0.9551          0       1536: 100%|██████████| 545/545 [05:42<00:00,  1.59it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.46it/s]


                   all        136      23488     0.0296     0.0421      0.021    0.00525

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50      13.9G      1.612      1.533     0.9822         10       2272: 100%|██████████| 545/545 [05:06<00:00,  1.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.43it/s]


                   all        136      23488     0.0426     0.0486     0.0248    0.00677

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50      14.1G      1.555      1.307      1.014         18       2848: 100%|██████████| 545/545 [05:21<00:00,  1.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.42it/s]


                   all        136      23488     0.0338     0.0568     0.0189    0.00509

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50      14.7G      1.648      1.421     0.9944          0       2848: 100%|██████████| 545/545 [05:13<00:00,  1.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.43it/s]


                   all        136      23488    0.00829     0.0141     0.0043   0.000857

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/50      15.5G      1.637      1.515     0.9752         24       2496: 100%|██████████| 545/545 [05:08<00:00,  1.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.42it/s]


                   all        136      23488     0.0288     0.0428     0.0158    0.00403

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/50      14.1G      1.477      1.357     0.9642         78       1792: 100%|██████████| 545/545 [05:24<00:00,  1.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.39it/s]


                   all        136      23488     0.0362     0.0541     0.0192     0.0049

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/50      15.1G        1.5      1.452     0.9633        107       2304: 100%|██████████| 545/545 [05:10<00:00,  1.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.41it/s]


                   all        136      23488     0.0515     0.0827      0.031    0.00902

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/50      14.4G      1.377      1.051     0.9322         86       2592: 100%|██████████| 545/545 [05:09<00:00,  1.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.40it/s]


                   all        136      23488     0.0563     0.0435     0.0173    0.00405

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/50      15.2G      1.326     0.9783     0.9363        120       1536: 100%|██████████| 545/545 [05:11<00:00,  1.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.41it/s]


                   all        136      23488     0.0613     0.0447     0.0176    0.00433

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/50      15.4G       1.31       1.04     0.9297          1       2368: 100%|██████████| 545/545 [05:11<00:00,  1.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.40it/s]


                   all        136      23488     0.0628     0.0608     0.0238    0.00642

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/50      14.9G      1.304      1.066     0.9023        258       2304: 100%|██████████| 545/545 [05:15<00:00,  1.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.42it/s]


                   all        136      23488     0.0251     0.0414     0.0139    0.00352

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/50      13.6G      1.306     0.9566     0.9225         82       1408: 100%|██████████| 545/545 [05:05<00:00,  1.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.43it/s]


                   all        136      23488     0.0333      0.047     0.0186    0.00453

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/50        15G      1.292     0.9572     0.9615          0       2400:  90%|████████▉ | 488/545 [04:42<00:40,  1.42it/s]



      17/50      14.1G      1.294      0.966     0.9609        129       2240: 100%|██████████| 545/545 [05:36<00:00,  1.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:15<00:00,  4.46it/s]


                   all        136      23488     0.0328     0.0531     0.0187    0.00487
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 6 epochs. Best results observed at epoch 11, best model saved as best.pt.
To update EarlyStopping(patience=6) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.

17 epochs completed in 1.599 hours.
Optimizer stripped from deepscores_detection_stems_1_11x/yolo11m1900_stem_classes/weights/last.pt, 41.0MB
Optimizer stripped from deepscores_detection_stems_1_11x/yolo11m1900_stem_classes/weights/best.pt, 41.0MB

Validating deepscores_detection_stems_1_11x/yolo11m1900_stem_classes/weights/best.pt...
Ultralytics 8.3.74 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
                                                 CUDA:1 (Tesla T4, 15095MiB)
YOLO11m summary (fused): 303 layers, 20,134,888 parameters, 0 gradients, 68.2 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:10<00:00,  6.39it/s]


                   all        136      23488     0.0515     0.0828      0.031    0.00903
                 brace        135      23488     0.0515     0.0828      0.031    0.00903
Speed: 1.2ms preprocess, 72.3ms inference, 0.0ms loss, 1.2ms postprocess per image
Training completed successfully!


In [None]:
from ultralytics import YOLO
import torch
from pathlib import Path
import yaml
import gc
import cv2
import numpy as np
from PIL import Image
import albumentations as A
from torch.utils.data import Dataset, DataLoader

class TiledDeepScoresDataset(Dataset):
    def __init__(self, img_dir, label_dir, tile_size=640, overlap=0.2):
        self.img_dir = Path(img_dir)
        self.label_dir = Path(label_dir)
        self.tile_size = tile_size
        self.overlap = overlap
        self.img_files = list(self.img_dir.glob('*.jpg')) + list(self.img_dir.glob('*.png'))
        
        # Pre-calculate tiles for each image
        self.tiles = []
        for img_file in self.img_files:
            img = cv2.imread(str(img_file))
            height, width = img.shape[:2]
            
            # Calculate stride (distance between tile starts)
            stride = int(tile_size * (1 - overlap))
            
            # Generate tile coordinates
            for y in range(0, height - tile_size + 1, stride):
                for x in range(0, width - tile_size + 1, stride):
                    self.tiles.append({
                        'img_file': img_file,
                        'x': x,
                        'y': y
                    })

    def __len__(self):
        return len(self.tiles)

    def __getitem__(self, idx):
        tile_info = self.tiles[idx]
        img_file = tile_info['img_file']
        x, y = tile_info['x'], tile_info['y']
        
        # Load image and extract tile
        img = cv2.imread(str(img_file))
        tile = img[y:y+self.tile_size, x:x+self.tile_size]
        
        # Load corresponding label file
        label_file = self.label_dir / (img_file.stem + '.txt')
        if label_file.exists():
            labels = np.loadtxt(str(label_file)).reshape(-1, 5)
            
            # Transform labels to tile coordinates
            tile_labels = []
            for label in labels:
                class_id, x_center, y_center, w, h = label
                
                # Convert normalized coordinates to absolute
                img_height, img_width = img.shape[:2]
                abs_x = x_center * img_width
                abs_y = y_center * img_height
                abs_w = w * img_width
                abs_h = h * img_height
                
                # Check if object is in tile
                if (abs_x + abs_w/2 > tile_info['x'] and 
                    abs_x - abs_w/2 < tile_info['x'] + self.tile_size and
                    abs_y + abs_h/2 > tile_info['y'] and 
                    abs_y - abs_h/2 < tile_info['y'] + self.tile_size):
                    
                    # Transform coordinates relative to tile
                    tile_x = (abs_x - tile_info['x']) / self.tile_size
                    tile_y = (abs_y - tile_info['y']) / self.tile_size
                    tile_w = abs_w / self.tile_size
                    tile_h = abs_h / self.tile_size
                    
                    # Clip coordinates to tile boundaries
                    tile_x = np.clip(tile_x, 0, 1)
                    tile_y = np.clip(tile_y, 0, 1)
                    tile_w = np.clip(tile_w, 0, 1)
                    tile_h = np.clip(tile_h, 0, 1)
                    
                    tile_labels.append([class_id, tile_x, tile_y, tile_w, tile_h])
            
            return tile, np.array(tile_labels)
        return tile, np.array([])

def train_yolo_deepscores(
    yaml_path,
    model_size='m',
    epochs=50,
    batch_size=2,
    imgsz=1900,
    workers=8,
    selected_classes=['augmentationDot'],
    tile_size=640,
    tile_overlap=0.2
):
    """
    Memory-optimized YOLOv8 training on DeepScores dataset with tiling
    """
    # Clear GPU cache before starting
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()
    
    print(f"Training YOLOv8{model_size} on DeepScores dataset with tiling...")
    
    # Load dataset configuration
    with open(yaml_path, 'r') as f:
        dataset_config = yaml.safe_load(f)
    
    # Get indices of selected classes
    try:
        selected_indices = [dataset_config['names'].index(cls) for cls in selected_classes]
    except ValueError as e:
        print(f"Error: Class not found in dataset. {e}")
        print("Available classes:", dataset_config['names'])
        raise
    
    # Initialize model
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    model = YOLO(f'yolov8{model_size}.pt')
    
    # Calculate optimal batch size
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.get_device_properties(0).total_memory
        memory_per_image = (tile_size * tile_size * 3 * 4) / (1024 * 1024 * 1024)
        optimal_batch = min(batch_size, int(gpu_memory / (memory_per_image * 1024 * 1024 * 1024)))
        batch_size = max(1, optimal_batch)
        print(f"Adjusted batch size to {batch_size} based on available GPU memory")

    # Device setting
    device = ','.join(str(i) for i in range(torch.cuda.device_count())) if torch.cuda.device_count() > 1 else '0'
    
    # Training arguments
    args = {
        'data': yaml_path,
        'epochs': epochs,
        'batch': batch_size,
        'imgsz': tile_size,  # Use tile size instead of full image size
        'workers': min(workers, 8),
        'patience': 6,
        'device': device,
        'project': 'deepscores_detection_dots_1_11x_tiled',
        'name': f'yolov8{model_size}{tile_size}_dots_tiled',
        'classes': selected_indices,
        'single_cls': True,
        
        # Augmentation settings optimized for small objects
        'degrees': 5.0,
        'scale': 0.8,
        'shear': 5.0,
        'flipud': 0.0,
        'fliplr': 0.0,
        'mosaic': 0.0,
        'mixup': 0.0,
        
        # Optimization settings
        'optimizer': 'AdamW',
        'lr0': 0.002,
        'lrf': 0.01,
        'momentum': 0.937,
        'weight_decay': 0.0001,
        'warmup_epochs': 3.0,
        'warmup_momentum': 0.8,
        'warmup_bias_lr': 0.1,
        'box': 7.5,
        'cls': 0.5,
        'dfl': 2.0,
        'amp': True,
        
        # Other settings
        'save': True,
        'save_period': 10,
        'plots': False,
        'rect': False,
        'val': True,
        'cache': 'ram',
        'overlap_mask': False,
        'multi_scale': True,
        'fraction': 1.0,
    }
    
    # Create custom dataset with tiling
    train_dataset = TiledDeepScoresDataset(
        img_dir=dataset_config['train'],
        label_dir=dataset_config['train'].replace('images', 'labels'),
        tile_size=tile_size,
        overlap=tile_overlap
    )
    
    try:
        if hasattr(model.model, 'gradient_checkpointing_enable'):
            model.model.gradient_checkpointing_enable()
        
        results = model.train(**args)
        print("Training completed successfully!")
        
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()
            
        return results
    except Exception as e:
        print(f"Error during training: {str(e)}")
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()
        raise

if __name__ == "__main__":
    yaml_path = "/kaggle/working/yamloutput/dataset.yaml"
    
    results = train_yolo_deepscores(
        yaml_path=yaml_path,
        model_size='m',
        epochs=50,
        batch_size=2,
        imgsz=1900,
        workers=8,
        selected_classes=['stem'],
        tile_size=640,
        tile_overlap=0.2
    )

Training YOLOv8m on DeepScores dataset with tiling...
Adjusted batch size to 2 based on available GPU memory
Ultralytics 8.3.70 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
                                                 CUDA:1 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=/kaggle/working/yamloutput/dataset.yaml, epochs=50, time=None, patience=6, batch=2, imgsz=640, save=True, save_period=10, cache=ram, device=0,1, workers=8, project=deepscores_detection_dots_1_11x_tiled, name=yolov8m640_dots_tiled, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=True, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=True, overlap_mask=False, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=False, source=None, vid_stri

  check_for_updates()


Overriding model.yaml nc=80 with nc=136

                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralyti

[34m[1mtrain: [0mScanning /kaggle/working/yamloutput/labels/train.cache... 1089 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1089/1089 [00:00<?, ?it/s]




[34m[1mtrain: [0mCaching images (0.9GB RAM): 100%|██████████| 1089/1089 [00:32<00:00, 33.89it/s]
  check_for_updates()
  self.pid = os.fork()
[34m[1mval: [0mScanning /kaggle/working/yamloutput/labels/val.cache... 136 images, 0 backgrounds, 0 corrupt: 100%|██████████| 136/136 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mCaching images (0.1GB RAM): 100%|██████████| 136/136 [00:07<00:00, 19.15it/s]
  self.pid = os.fork()


[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0001), 83 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mdeepscores_detection_dots_1_11x_tiled/yolov8m640_dots_tiled[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  check_for_updates()
       1/50      1.94G     0.3128     0.8771    0.06667          0        608: 100%|██████████| 545/545 [01:30<00:00,  5.99it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 68/68 [00:02<00:00, 31.48it/s]


                   all        136       1885          0          0          0          0

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50      1.69G      0.371     0.2333    0.08146          0        576:  65%|██████▍   | 354/545 [00:37<00:20,  9.52it/s]

In [2]:
import os
import torch
import torch.version
import torch.cuda
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
import cv2
from PIL import Image
import json
import matplotlib.pyplot as plt
from torchvision.transforms import functional as F
from torchvision.transforms import transforms as T
from tqdm import tqdm
from torch.cuda.amp import GradScaler
import os
import gc
# Force garbage collection
gc.collect()
torch.cuda.empty_cache()

class DeepScoresV2Dataset(Dataset):
    def __init__(self, root_dir, annotation_file, transforms=None, annotation_set_idx=0):
        self.root_dir = root_dir
        self.transforms = transforms
        self.annotation_set_idx = annotation_set_idx
        
        print(f"Loading annotations from {annotation_file}")
        with open(annotation_file, 'r') as f:
            data = json.load(f)
        
        print(f"JSON structure keys: {list(data.keys())}")
        
        self.annotations = data["annotations"]
        self.images_info = {img["id"]: img for img in data["images"]}
        self.categories = data["categories"]
        self.annotation_sets = data["annotation_sets"]
        
        print(f"Using annotation set: {self.annotation_sets[annotation_set_idx]}")
        
        if len(data["images"]) > 0:
            print(f"Sample image info keys: {list(data['images'][0].keys())}")
            
        self.cat_id_to_idx = {}
        for cat_id, cat_info in self.categories.items():
            self.cat_id_to_idx[cat_id] = len(self.cat_id_to_idx) + 1
        
        self.image_ids = list(self.images_info.keys())
        
        print(f"Total number of images: {len(self.image_ids)}")
        print(f"Total number of annotations: {len(self.annotations)}")
        print(f"Total number of categories: {len(self.categories)}")
        
        print("\nFirst few categories:")
        for i, (cat_id, cat_info) in enumerate(self.categories.items()):
            if i < 5:
                print(f"Category {cat_id}: {cat_info.get('name', 'No name')}")
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img_info = self.images_info[img_id]
        
        if idx < 3:
            print(f"\nLoading image {idx}:")
            print(f"Image ID: {img_id}")
            print(f"Image Info: {img_info}")
        
        if "file_name" in img_info:
            file_name = img_info["file_name"]
        elif "filename" in img_info:
            file_name = img_info["filename"]
        else:
            file_name = f"{img_id}.png"
            print(f"Warning: No filename found for image {img_id}, trying {file_name}")
        
        img_path = os.path.join(self.root_dir, "images", file_name)
        try:
            image = Image.open(img_path).convert("RGB")
            
            # Resize image to a smaller size
            original_size = image.size
            image = T.Resize((500, 500))(image)
            
            if idx < 3:
                print(f"Successfully loaded image: {file_name}")
                print(f"Original size: {original_size}, New size: {image.size}")
                debug_path = f"debug_image_{idx}.png"
                image.save(debug_path)
                print(f"Saved debug image to: {debug_path}")
                
        except FileNotFoundError:
            print(f"Error: Image file not found at {img_path}")
            image = Image.new('RGB', (100, 100), color='red')
            return image, {"boxes": torch.zeros((0, 4)), "labels": torch.zeros(0, dtype=torch.int64)}
        
        ann_ids = img_info.get("ann_ids", [])
        boxes = []
        labels = []
        
        if idx < 3:
            print(f"Number of annotations for this image: {len(ann_ids)}")
        
        # Scale factors for bbox coordinates
        scale_x = 800 / original_size[0]
        scale_y = 800 / original_size[1]
        
        for ann_id in ann_ids:
            if ann_id not in self.annotations:
                print(f"Warning: annotation {ann_id} not found in annotations")
                continue
                
            ann = self.annotations[ann_id]
            
            if idx < 3:
                print(f"Annotation {ann_id}: {ann}")
            
            if "a_bbox" in ann:
                bbox = ann["a_bbox"]
                
                if any(not isinstance(coord, (int, float)) for coord in bbox):
                    print(f"Warning: Invalid bbox coordinates in annotation {ann_id}: {bbox}")
                    continue
                
                # Scale bbox coordinates
                scaled_bbox = [
                    bbox[0] * scale_x,
                    bbox[1] * scale_y,
                    bbox[2] * scale_x,
                    bbox[3] * scale_y
                ]
                
                for cat_id in ann["cat_id"]:
                    if cat_id is None:
                        continue
                    
                    if cat_id in self.cat_id_to_idx:
                        boxes.append(scaled_bbox)
                        labels.append(self.cat_id_to_idx[cat_id])
                        
                        if idx < 3:
                            print(f"Added bbox: {scaled_bbox} for category {cat_id}")
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        image = F.to_tensor(image)
        
        if idx < 3:
            print(f"Final number of boxes: {len(boxes)}")
            print(f"Final number of labels: {len(labels)}")
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = torch.tensor([idx])
        
        if boxes.shape[0] > 0:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        else:
            area = torch.zeros((0,))
        target["area"] = area
        
        target["iscrowd"] = torch.zeros((len(boxes),), dtype=torch.int64)
        
        if self.transforms:
            image, target = self.transforms(image, target)
            
        return image, target

class Compose:
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target

def get_transform():
    transforms = []
    return Compose(transforms)

def get_model(num_classes):
    weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
    model = fasterrcnn_resnet50_fpn(weights=weights)
    
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

def train_model(model, data_loader, optimizer, device, num_epochs, gradient_accumulation_steps=8):
    model.train()
    scaler = GradScaler()
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        
        running_loss = 0.0
        total_batches = len(data_loader)
        optimizer.zero_grad(set_to_none=True)  # More efficient than zero_grad()
        
        pbar = tqdm(data_loader, total=total_batches)
        
        for batch_idx, (images, targets) in enumerate(pbar):
            # Clear cache every iteration
            torch.cuda.empty_cache()
            
            try:
                images = [image.to(device, non_blocking=True) for image in images]
                targets = [{k: v.to(device, non_blocking=True) for k, v in t.items()} for t in targets]
                
                with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
                    loss_dict = model(images, targets)
                    losses = sum(loss for loss in loss_dict.values())
                    losses = losses / gradient_accumulation_steps
                
                scaler.scale(losses).backward()
                
                if (batch_idx + 1) % gradient_accumulation_steps == 0:
                    scaler.unscale_(optimizer)
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad(set_to_none=True)
                
                # Explicitly move data to CPU and delete
                for image in images:
                    image.detach_()
                    del image
                for target in targets:
                    for v in target.values():
                        v.detach_()
                        del v
                    del target
                del images, targets
                
                running_loss += losses.item() * gradient_accumulation_steps
                avg_loss = running_loss / (batch_idx + 1)
                
                pbar.set_postfix({'loss': f'{avg_loss:.4f}'})
                
            except RuntimeError as e:
                if "out of memory" in str(e):
                    print('| WARNING: ran out of memory, skipping batch')
                    if hasattr(torch.cuda, 'empty_cache'):
                        torch.cuda.empty_cache()
                    continue
                else:
                    raise e
            
            # Move data to CPU to free up GPU memory
            images = [image.cpu() for image in images]
            targets = [{k: v.cpu() for k, v in t.items()} for t in targets]
            
            running_loss += losses.item() * gradient_accumulation_steps
            avg_loss = running_loss / (batch_idx + 1)
            
            pbar.set_postfix({'loss': f'{avg_loss:.4f}'})
        
        # Handle remaining gradients
        if (batch_idx + 1) % gradient_accumulation_steps != 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            
        epoch_loss = running_loss / len(data_loader)
        print(f"\nEpoch {epoch+1} Complete - Average Loss: {epoch_loss:.4f}")
        
        # Clear cache at end of epoch
        torch.cuda.empty_cache()
        
        if (epoch + 1) % 5 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': epoch_loss,
            }, f"faster_rcnn_deepscoresv2_epoch_{epoch+1}.pth")

def visualize_sample(dataset, idx):
    image, target = dataset[idx]
    
    if isinstance(image, torch.Tensor):
        image = F.to_pil_image(image)
    
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    
    boxes = target["boxes"].numpy()
    labels = target["labels"].numpy()
    
    for box, label in zip(boxes, labels):
        x1, y1, x2, y2 = box
        plt.gca().add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1, 
                                         fill=False, edgecolor='red', linewidth=2))
        
        category_name = "Unknown"
        for cat_id, cat_idx in dataset.cat_id_to_idx.items():
            if cat_idx == label:
                if cat_id in dataset.categories:
                    category_name = dataset.categories[cat_id]["name"]
                break
                
        plt.text(x1, y1, f"{category_name} ({label})", 
                bbox=dict(facecolor='yellow', alpha=0.5))
    
    plt.axis('off')
    plt.title(f"Sample {idx}: {len(boxes)} objects")
    plt.savefig(f"sample_visualization_{idx}.png")
    plt.close()

def debug_dataset(dataset, num_samples=3):
    print("\nDebugging Dataset:")
    print(f"Dataset size: {len(dataset)}")
    
    for i in range(min(num_samples, len(dataset))):
        print(f"\nChecking sample {i}:")
        try:
            image, target = dataset[i]
            
            if isinstance(image, torch.Tensor):
                print(f"Image is tensor with shape: {image.shape}")
            else:
                print(f"Image is PIL with size: {image.size}")
            
            print("Target contents:")
            for k, v in target.items():
                if isinstance(v, torch.Tensor):
                    print(f"{k}: tensor shape {v.shape}, dtype {v.dtype}")
                else:
                    print(f"{k}: {v}")
            
            visualize_sample(dataset, i)
            print(f"Saved visualization for sample {i}")
            
        except Exception as e:
            print(f"Error processing sample {i}: {str(e)}")
            import traceback
            traceback.print_exc()

if __name__ == "__main__":
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(f"Using device: {device}")
    
    dataset_root = "/kaggle/input/ds2-dense/ds2_dense"
    annotation_file = "/kaggle/input/ds2-dense/ds2_dense/deepscores_train.json"
    
    dataset = DeepScoresV2Dataset(
        dataset_root, 
        annotation_file, 
        transforms=get_transform(),
        annotation_set_idx=0
    )
    
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=1,
        shuffle=True, 
        collate_fn=lambda x: tuple(zip(*x)),
        num_workers=2,  # Reduced from 4
        pin_memory=True,
        persistent_workers=True,
        prefetch_factor=2
    )
    
    val_loader = DataLoader(
        val_dataset, 
        batch_size=1,  # Reduced batch size
        shuffle=False, 
        collate_fn=lambda x: tuple(zip(*x)),
        num_workers=4,
        pin_memory=True
    )
    
    num_classes = len(dataset.cat_id_to_idx) + 1
    print(f"Training with {num_classes} classes")
    
    model = get_model(num_classes)
    model.to(device)
    
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
    
    debug_dataset(dataset)
    
    train_model(model, train_loader, optimizer, device, num_epochs=20)
    
    torch.save(model.state_dict(), "faster_rcnn_deepscoresv2_final.pth")

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7cd1a742fe20>>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


Using device: cuda
Loading annotations from /kaggle/input/ds2-dense/ds2_dense/deepscores_train.json
JSON structure keys: ['info', 'annotation_sets', 'categories', 'images', 'annotations']
Using annotation set: deepscores
Sample image info keys: ['id', 'filename', 'width', 'height', 'ann_ids']
Total number of images: 1362
Total number of annotations: 889833
Total number of categories: 208

First few categories:
Category 1: brace
Category 137: brace
Category 2: ledgerLine
Category 138: legerLine
Category 3: repeatDot
Training with 209 classes


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:02<00:00, 80.9MB/s] 



Debugging Dataset:
Dataset size: 1362

Checking sample 0:

Loading image 0:
Image ID: 2
Image Info: {'id': 2, 'filename': 'lg-94161796-aug-gonville--page-3.png', 'width': 1960, 'height': 2772, 'ann_ids': ['1113019', '1113020', '1113021', '1113022', '1113023', '1113024', '1113025', '1113026', '1113027', '1113028', '1113029', '1113030', '1113031', '1113032', '1113033', '1113034', '1113035', '1113036', '1113037', '1113038', '1113039', '1113040', '1113041', '1113042', '1113043', '1113044', '1113045', '1113046', '1113047', '1113048', '1113049', '1113050', '1113051', '1113052', '1113053', '1113054', '1113055', '1113056', '1113057', '1113058', '1113059', '1113060', '1113061', '1113062', '1113063', '1113064', '1113065', '1113066', '1113067', '1113068', '1113069', '1113070', '1113071', '1113072', '1113073', '1113074', '1113075', '1113076', '1113077', '1113078', '1113079', '1113080', '1113081', '1113082', '1113083', '1113084', '1113085', '1113086', '1113087', '1113088', '1113089', '1113090', '1

  scaler = GradScaler()


Saved visualization for sample 2

Epoch 1/20


  0%|          | 0/1225 [00:04<?, ?it/s, loss=30.2997]


UnboundLocalError: local variable 'images' referenced before assignment