## Last Try

In [1]:
import os, glob, xml.etree.ElementTree as ET
import tifffile, numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
import openslide

In [2]:
## Paths & Constants
ROOT        = 'C:/Users/luukn/AIMI_MONKEY2/monkey-training'
IMG_DIRS    = {
    'original':   os.path.join(ROOT, 'images/pas-original'),
    'diagnostic': os.path.join(ROOT, 'images/pas-diagnostic'),
    'cpg':        os.path.join(ROOT, 'images/pas-cpg'),
}
XML_DIR     = os.path.join(ROOT, 'annotations/xml')
OUT_ROOT    = 'thumbs'  # thumbnails+labels output

In [3]:
## 3. Helpers: Thumbnail and Label Conversion
def get_original_image_size():
    wsi_path = r'C:\Users\luukn\AIMI_MONKEY2\monkey-training\images\pas-original\D_P000001_PAS_Original.tif'  # or .svs, .ndpi, etc.
    slide = openslide.OpenSlide(wsi_path)
    original_w, original_h = slide.dimensions  # returns (width, height)

    return original_w, original_h

# 3.1 Downsample TIFF → JPEG thumbnail using tifffile pyramid

def save_thumb(src_tif, dst_jpg):
    with tifffile.TiffFile(src_tif) as tif:
        arr = tif.series[0].levels[-1].asarray()
    # ensure RGB
    if arr.ndim == 2:
        arr = np.stack([arr]*3, axis=-1)
    elif arr.shape[2] > 3:
        arr = arr[:,:,:3]
    img = Image.fromarray(arr)
    img.save(dst_jpg, quality=90)

def xml_to_yolo(xml_path, thumb_path, dst_txt, class_id=0, box_size=32, original_w=100000, original_h=100000):
    if not os.path.exists(thumb_path):
        print(f"⚠ Thumbnail not found: {thumb_path}")
        return
    
    original_w, original_h = get_original_image_size()

    W_thumb, H_thumb = Image.open(thumb_path).size

    scale_x = W_thumb / original_w
    scale_y = H_thumb / original_h

    tree = ET.parse(xml_path)
    root = tree.getroot()
    annotations = root.find('Annotations')
    if annotations is None:
        print(f"⚠ No <Annotations> tag found in {xml_path}")
        return

    lines = []
    for annotation in annotations.findall('Annotation'):
        coords = annotation.find('Coordinates')
        if coords is None:
            continue
        for coord in coords.findall('Coordinate'):
            x = coord.attrib.get('X')
            y = coord.attrib.get('Y')
            if x is None or y is None:
                continue
            x = float(x) * scale_x
            y = float(y) * scale_y

            half = box_size / 2
            x_min = x - half
            y_min = y - half
            x_max = x + half
            y_max = y + half

            # Clip to image bounds
            x_min = max(0, x_min)
            y_min = max(0, y_min)
            x_max = min(W_thumb, x_max)
            y_max = min(H_thumb, y_max)

            x_c = ((x_min + x_max) / 2) / W_thumb
            y_c = ((y_min + y_max) / 2) / H_thumb
            w   = (x_max - x_min) / W_thumb
            h   = (y_max - y_min) / H_thumb

            if w > 0 and h > 0:
                lines.append(f"{class_id} {x_c:.6f} {y_c:.6f} {w:.6f} {h:.6f}")

    os.makedirs(os.path.dirname(dst_txt), exist_ok=True)
    if lines:
        with open(dst_txt, 'w') as f:
            f.write("\n".join(lines))
        print(f"✅ {len(lines)} boxes in {xml_path} → {dst_txt}")
    else:
        print(f"⚠ No valid boxes in {xml_path}")



In [4]:
## 4. Gather Source Paths & Split
# source TIFF paths
original = glob.glob(os.path.join(IMG_DIRS['original'],   '*.tif'))
diagnostic = glob.glob(os.path.join(IMG_DIRS['diagnostic'], '*.tif'))
cpg = glob.glob(os.path.join(IMG_DIRS['cpg'],        '*.tif'))

# train/val split on original+diagnostic
trainval = original + diagnostic
train, val = train_test_split(trainval, test_size=0.2, random_state=42)

splits = {'train': train, 'val': val, 'test': cpg}
for name, paths in splits.items():
    print(f"{name}: {len(paths)} images")

train: 69 images
val: 18 images
test: 81 images


In [5]:
## 5. Generate Thumbnails & YOLO Labels

for split, paths in splits.items():
    for src in paths:
        basename = os.path.basename(src).replace('.tif','')
        slide_id = basename.split('_PAS_')[0]  # e.g. A_P000001
        # paths for output
        img_dir = os.path.join(OUT_ROOT, split, 'images')
        lbl_dir = os.path.join(OUT_ROOT, split, 'labels')
        os.makedirs(img_dir, exist_ok=True)
        os.makedirs(lbl_dir, exist_ok=True)
        # thumbnail
        dst_jpg = os.path.join(img_dir, slide_id + '.jpg')
        if not os.path.exists(dst_jpg):
            save_thumb(src, dst_jpg)
        # label
        xml_path = os.path.join(XML_DIR, slide_id + '.xml')
        dst_txt  = os.path.join(lbl_dir, slide_id + '.txt')
        if os.path.exists(xml_path):
            xml_to_yolo(xml_path, dst_jpg, dst_txt)
        else:
            print(f"⚠️  Missing XML for {slide_id}")

✅ 1488 boxes in C:/Users/luukn/AIMI_MONKEY2/monkey-training\annotations/xml\C_P000027.xml → thumbs\train\labels\C_P000027.txt
✅ 637 boxes in C:/Users/luukn/AIMI_MONKEY2/monkey-training\annotations/xml\D_P000005.xml → thumbs\train\labels\D_P000005.txt
✅ 358 boxes in C:/Users/luukn/AIMI_MONKEY2/monkey-training\annotations/xml\D_P000013.xml → thumbs\train\labels\D_P000013.txt
⚠ No valid boxes in C:/Users/luukn/AIMI_MONKEY2/monkey-training\annotations/xml\A_P000037.xml
⚠ No valid boxes in C:/Users/luukn/AIMI_MONKEY2/monkey-training\annotations/xml\B_P000011.xml
⚠ No valid boxes in C:/Users/luukn/AIMI_MONKEY2/monkey-training\annotations/xml\A_P000035.xml
✅ 1197 boxes in C:/Users/luukn/AIMI_MONKEY2/monkey-training\annotations/xml\D_P000011.xml → thumbs\train\labels\D_P000011.txt
⚠ No valid boxes in C:/Users/luukn/AIMI_MONKEY2/monkey-training\annotations/xml\C_P000038.xml
✅ 248 boxes in C:/Users/luukn/AIMI_MONKEY2/monkey-training\annotations/xml\D_P000006.xml → thumbs\train\labels\D_P000006.t

In [6]:
## 6. Build File Lists & `data.yaml`

import glob

# clear any preexisting label caches
targets = [os.path.join(OUT_ROOT, s, 'labels.cache') for s in ['train','val','test']]
for t in targets:
    if os.path.exists(t):
        os.remove(t)
        print(f"Deleted label cache: {t}")

list_files = {}
for split in ['train','val','test']:
    jpgs = glob.glob(os.path.join(OUT_ROOT, split, 'images','*.jpg'))
    list_files[split] = sorted(jpgs)
    with open(f'{split}_list.txt','w') as f:
        f.write("\n".join(list_files[split]))
    print(f"Wrote {len(jpgs)} paths to {split}_list.txt")

# write data.yaml
data_yaml = f"""
train: {os.path.abspath('train_list.txt')}
val:   {os.path.abspath('val_list.txt')}
test:  {os.path.abspath('test_list.txt')}

nc: 1
names: ['MNL']
"""
open('data.yaml','w').write(data_yaml)
print('Created data.yaml')

Wrote 59 paths to train_list.txt
Wrote 17 paths to val_list.txt
Wrote 81 paths to test_list.txt
Created data.yaml


In [7]:
## 7. Train & Evaluate YOLOv8 Nano
model = YOLO('yolov8n.pt')
model.info()

# train
model.train(
    data='data.yaml',
    epochs=10,
    batch=8,
    imgsz=640,
    project='MONKEY_YOLOv8n',
    cache=False,
    name='run_transfer',
    exist_ok=True
)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 35.0MB/s]


YOLOv8n summary: 129 layers, 3,157,200 parameters, 0 gradients, 8.9 GFLOPs
New https://pypi.org/project/ultralytics/8.3.136 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.135  Python-3.12.0 torch-2.5.1+cpu CPU (AMD Ryzen 7 7735HS with Radeon Graphics)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1

[34m[1mtrain: [0mScanning thumbs\train\labels... 35 images, 24 backgrounds, 0 corrupt: 100%|██████████| 59/59 [00:00<00:00, 369.70it/s]

[34m[1mtrain: [0mNew cache created: thumbs\train\labels.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 340.8153.0 MB/s, size: 36.4 KB)


[34m[1mval: [0mScanning thumbs\val\labels... 11 images, 6 backgrounds, 0 corrupt: 100%|██████████| 17/17 [00:00<00:00, 426.75it/s]

[34m[1mval: [0mNew cache created: thumbs\val\labels.cache





Plotting labels to MONKEY_YOLOv8n\run_transfer\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mMONKEY_YOLOv8n\run_transfer[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      4.076       5.36      2.349       1946        640: 100%|██████████| 8/8 [00:34<00:00,  4.32s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:12<00:00,  6.23s/it]

                   all         17       9045          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      3.381      7.004      1.697          0        640: 100%|██████████| 8/8 [00:30<00:00,  3.77s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:10<00:00,  5.42s/it]

                   all         17       9045   0.000196   0.000111   9.82e-05   6.87e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G      3.577      4.508      1.794       2346        640: 100%|██████████| 8/8 [00:26<00:00,  3.35s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:10<00:00,  5.39s/it]

                   all         17       9045    0.00137   0.000774    0.00113   0.000289






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G      3.296      4.041      1.617       1622        640: 100%|██████████| 8/8 [00:27<00:00,  3.46s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:10<00:00,  5.49s/it]

                   all         17       9045   0.000588   0.000332   0.000308   5.12e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G      3.453      6.845       1.56          0        640: 100%|██████████| 8/8 [00:29<00:00,  3.74s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:11<00:00,  5.60s/it]

                   all         17       9045   0.000784   0.000442   0.000405    8.1e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G      3.051      4.433      1.535        426        640: 100%|██████████| 8/8 [00:30<00:00,  3.87s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:10<00:00,  5.22s/it]

                   all         17       9045    0.00118   0.000663   0.000621    0.00019






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G      3.095      4.024       1.57        768        640: 100%|██████████| 8/8 [00:30<00:00,  3.76s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:10<00:00,  5.30s/it]

                   all         17       9045    0.00157   0.000884    0.00106   0.000283






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G      3.293      3.979      1.648       1697        640: 100%|██████████| 8/8 [00:28<00:00,  3.54s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:10<00:00,  5.27s/it]

                   all         17       9045    0.00118   0.000663   0.000739   0.000292






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G      3.668       4.63      1.574        688        640: 100%|██████████| 8/8 [00:26<00:00,  3.35s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:10<00:00,  5.18s/it]

                   all         17       9045    0.00137   0.000774   0.000719   0.000261






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G      3.186      4.476      1.512       2654        640: 100%|██████████| 8/8 [00:30<00:00,  3.79s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:10<00:00,  5.31s/it]

                   all         17       9045    0.00235    0.00133     0.0012   0.000325






10 epochs completed in 0.113 hours.
Optimizer stripped from MONKEY_YOLOv8n\run_transfer\weights\last.pt, 6.2MB
Optimizer stripped from MONKEY_YOLOv8n\run_transfer\weights\best.pt, 6.2MB

Validating MONKEY_YOLOv8n\run_transfer\weights\best.pt...
Ultralytics 8.3.135  Python-3.12.0 torch-2.5.1+cpu CPU (AMD Ryzen 7 7735HS with Radeon Graphics)
Model summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:01<00:00,  1.19it/s]


                   all         17       9045    0.00235    0.00133     0.0012   0.000325
Speed: 1.6ms preprocess, 46.4ms inference, 0.0ms loss, 28.4ms postprocess per image
Results saved to [1mMONKEY_YOLOv8n\run_transfer[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x0000018D7BD03590>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.0480

In [8]:
# evaluate on test (pas-cpg)
metrics = model.val(data='data.yaml', split='test')
print(metrics)

Ultralytics 8.3.135  Python-3.12.0 torch-2.5.1+cpu CPU (AMD Ryzen 7 7735HS with Radeon Graphics)
Model summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 225.376.6 MB/s, size: 26.0 KB)


[34m[1mval: [0mScanning thumbs\test\labels... 47 images, 34 backgrounds, 0 corrupt: 100%|██████████| 81/81 [00:00<00:00, 449.70it/s]

[34m[1mval: [0mNew cache created: thumbs\test\labels.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:05<00:00,  1.85it/s]


                   all         81      41975    0.00259     0.0015    0.00132    0.00047
Speed: 0.7ms preprocess, 32.8ms inference, 0.0ms loss, 16.3ms postprocess per image
Results saved to [1mMONKEY_YOLOv8n\run_transfer[0m
ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x0000018D6FB37FE0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031