In [1]:
import os
import shutil
import pandas as pd
import yaml
from ultralytics import YOLO

In [2]:
import os
import shutil
import pandas as pd

# Define dataset paths
base_dir = os.path.abspath(".")  # Current working directory: D:\Sem 6 project
data_dir = os.path.join(base_dir, "MURA-v1.1")  # D:\Sem 6 project\MURA-v1.1
yolo_data_dir = os.path.join(base_dir, "MURA_YOLO")  # D:\Sem 6 project\MURA_YOLO

def prepare_yolo_dataset(csv_file, split, label_df):
    """ Converts dataset into YOLO format with improved debugging and label creation. """
    
    # Load CSV with correct column names
    df = pd.read_csv(csv_file, header=None, names=["image_path"])
    
    print(f"\n📊 CSV Analysis for {split}:")
    print(f"   - Total entries in CSV: {len(df)}")
    
    # Check for duplicate entries
    duplicates = df.duplicated().sum()
    print(f"   - Duplicate entries: {duplicates}")
    
    # Create YOLO directory structure
    images_dir = os.path.join(yolo_data_dir, split, "images")
    labels_dir = os.path.join(yolo_data_dir, split, "labels")
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)

    processed_count = 0
    missing_count = 0
    
    # Track unique filenames to avoid duplicates
    copied_filenames = set()
    
    # Map studies to labels
    study_to_label = dict(zip(label_df["study_path"], label_df["label"]))
    
    # Define body parts and class mapping
    body_parts = ["XR_ELBOW", "XR_FINGER", "XR_FOREARM", "XR_HAND", "XR_HUMERUS", "XR_SHOULDER", "XR_WRIST"]
    class_names = [f"{bp}_{cond}" for bp in body_parts for cond in ["negative", "positive"]]
    class_to_id = {name: idx for idx, name in enumerate(class_names)}

    for index, row in df.iterrows():
        # Construct full image path
        image_rel_path = row["image_path"]  # e.g., MURA-v1.1/train/XR_SHOULDER/...
        image_path = os.path.join(base_dir, image_rel_path)  # D:\Sem 6 project\MURA-v1.1\train\...
        image_path = os.path.normpath(image_path)
        
        filename = os.path.basename(image_path)
        
        if index < 5:
            print(f"🔹 Checking file: {image_path}")
            print(f"   - Exists: {os.path.exists(image_path)}")
        
        if not os.path.exists(image_path):
            if missing_count < 10:
                print(f"⚠️ Missing file: {image_path}")
            missing_count += 1
            continue  

        if filename in copied_filenames:
            base, ext = os.path.splitext(filename)
            filename = f"{base}_{index}{ext}"
        
        dest_image_path = os.path.join(images_dir, filename)
        copied_filenames.add(filename)
        
        shutil.copy(image_path, dest_image_path)
        processed_count += 1
        
        # Create label file
        # Get study path from image path
        study_path = "/".join(image_rel_path.split("/")[:-1]) + "/"  # e.g., MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/
        label = study_to_label.get(study_path, None)
        if label is None:
            print(f"Warning: No label found for study {study_path}")
            continue
        
        # Extract body part from image path
        body_part = [bp for bp in body_parts if bp in image_rel_path][0]
        condition = "positive" if label == 1 else "negative"
        class_name = f"{body_part}_{condition}"
        class_id = class_to_id[class_name]
        
        lbl_name = filename.replace(".png", ".txt")
        dest_label_path = os.path.join(labels_dir, lbl_name)
        with open(dest_label_path, "w") as f:
            f.write(f"{class_id} 0.5 0.5 1.0 1.0\n")
        
        if processed_count % 1000 == 0:
            print(f"✅ {processed_count} images processed...")

    print(f"📸 Total {processed_count} images processed for {split}.")
    print(f"🚨 Total {missing_count} missing images in {split} set.")

print("🔄 Preparing YOLO dataset...")
print(f"Current working directory: {os.getcwd()}")

if not os.path.exists(data_dir):
    print(f"❌ Data directory not found: {data_dir}")
    exit(1)

train_csv = os.path.join(data_dir, "train_image_paths.csv")
valid_csv = os.path.join(data_dir, "valid_image_paths.csv")
train_label_df = pd.read_csv(os.path.join(data_dir, "train_labeled_studies.csv"), header=None, names=["study_path", "label"])
valid_label_df = pd.read_csv(os.path.join(data_dir, "valid_labeled_studies.csv"), header=None, names=["study_path", "label"])

if not os.path.exists(train_csv):
    print(f"❌ Training CSV file not found: {train_csv}")
else:
    print(f"✅ Training CSV found: {train_csv}")
    try:
        train_df = pd.read_csv(train_csv, header=None, nrows=5)
        print("Sample training CSV content:")
        print(train_df)
    except Exception as e:
        print(f"Error reading CSV: {e}")
    prepare_yolo_dataset(train_csv, "train", train_label_df)

if not os.path.exists(valid_csv):
    print(f"❌ Validation CSV file not found: {valid_csv}")
else:
    print(f"✅ Validation CSV found: {valid_csv}")
    prepare_yolo_dataset(valid_csv, "valid", valid_label_df)

print("✅ Dataset preparation complete!")

🔄 Preparing YOLO dataset...
Current working directory: D:\Sem 6 project
✅ Training CSV found: D:\Sem 6 project\MURA-v1.1\train_image_paths.csv
Sample training CSV content:
                                                   0
0  MURA-v1.1/train/XR_SHOULDER/patient00001/study...
1  MURA-v1.1/train/XR_SHOULDER/patient00001/study...
2  MURA-v1.1/train/XR_SHOULDER/patient00001/study...
3  MURA-v1.1/train/XR_SHOULDER/patient00002/study...
4  MURA-v1.1/train/XR_SHOULDER/patient00002/study...

📊 CSV Analysis for train:
   - Total entries in CSV: 36808
   - Duplicate entries: 0
🔹 Checking file: D:\Sem 6 project\MURA-v1.1\train\XR_SHOULDER\patient00001\study1_positive\image1.png
   - Exists: True
🔹 Checking file: D:\Sem 6 project\MURA-v1.1\train\XR_SHOULDER\patient00001\study1_positive\image2.png
   - Exists: True
🔹 Checking file: D:\Sem 6 project\MURA-v1.1\train\XR_SHOULDER\patient00001\study1_positive\image3.png
   - Exists: True
🔹 Checking file: D:\Sem 6 project\MURA-v1.1\train\XR_SHOULDER\pa

KeyboardInterrupt: 

In [3]:
from ultralytics import YOLO
import torch

# Load YOLO model
model = YOLO("yolov8n.pt")

# Train with checkpoint saving after every epoch
model.train(
    data="MURA_YOLO/mura.yaml",  # Dataset YAML file
    epochs=25,                   # Number of epochs
    imgsz=640,                   # Image size
    batch=16,                     # Batch size
    project="runs/train",        # Project directory
    name="mura_yolov8(25 epoch)7", # Run name
    device=0 if torch.cuda.is_available() else "cpu",  # Use GPU if available
    save=True,                    # Enable checkpoint saving
    save_period=1                  # Save model weights after every epoch
)
model.resume()

New https://pypi.org/project/ultralytics/8.3.96 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.94  Python-3.10.11 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3060, 12288MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=MURA_YOLO/mura.yaml, epochs=25, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=1, cache=False, device=0, workers=8, project=runs/train, name=mura_yolov8(25 epoch)73, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=Fals

[34m[1mtrain: [0mScanning D:\Sem 6 project\MURA_YOLO\train\labels... 36806 images, 0 backgrounds, 2 corrupt: 100%|██████████| 368[0m






[34m[1mtrain: [0mNew cache created: D:\Sem 6 project\MURA_YOLO\train\labels.cache


[34m[1mval: [0mScanning D:\Sem 6 project\MURA_YOLO\valid\labels.cache... 3197 images, 0 backgrounds, 0 corrupt: 100%|██████████| [0m


Plotting labels to runs\train\mura_yolov8(25 epoch)73\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns\train\mura_yolov8(25 epoch)73[0m
Starting training for 25 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/25      2.04G     0.2303      2.047     0.9795         18        640: 100%|██████████| 2301/2301 [05:04<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:


                   all       3197       3197      0.526      0.815      0.682      0.674

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/25      3.02G      0.176      1.241     0.9146         20        640: 100%|██████████| 2301/2301 [04:43<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197        0.6      0.775      0.736      0.728






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/25      3.02G     0.1906      1.081     0.9191         18        640: 100%|██████████| 2301/2301 [04:51<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197       0.66      0.744      0.743      0.727






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/25      3.02G     0.1708     0.9863     0.9112         18        640: 100%|██████████| 2301/2301 [04:44<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.652      0.761      0.761      0.753






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/25      3.02G     0.1394     0.9163     0.8994         21        640: 100%|██████████| 2301/2301 [04:38<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197       0.68       0.79      0.789      0.784






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/25      3.02G     0.1198     0.8709     0.8935         18        640: 100%|██████████| 2301/2301 [04:39<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.742      0.774      0.808      0.804






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/25      3.02G     0.1079     0.8467     0.8893         19        640: 100%|██████████| 2301/2301 [04:38<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.712      0.784      0.805      0.801






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/25      3.02G    0.09933     0.8195     0.8872         18        640: 100%|██████████| 2301/2301 [04:36<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.765      0.778      0.825      0.821






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/25      3.02G    0.09196     0.8029     0.8841         22        640: 100%|██████████| 2301/2301 [04:36<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.784      0.772      0.835      0.832






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/25      3.02G    0.08684     0.7856     0.8832         18        640: 100%|██████████| 2301/2301 [04:37<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.763        0.8      0.843      0.841






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/25      3.02G    0.08106     0.7709     0.8808         22        640: 100%|██████████| 2301/2301 [04:42<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197       0.75      0.821      0.841       0.84






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/25      3.02G     0.0768     0.7602     0.8794         18        640: 100%|██████████| 2301/2301 [04:40<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.771       0.81      0.848      0.847






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/25      3.02G    0.07329      0.747     0.8804         19        640: 100%|██████████| 2301/2301 [04:35<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.774      0.813      0.849      0.848






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/25      3.02G     0.0689     0.7348     0.8772         17        640: 100%|██████████| 2301/2301 [04:32<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.785      0.807       0.85      0.848






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/25      3.02G    0.06634     0.7294     0.8772         18        640: 100%|██████████| 2301/2301 [04:31<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:


                   all       3197       3197       0.78      0.813      0.854      0.853
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/25      3.02G    0.05202     0.5618     0.8639          6        640: 100%|██████████| 2301/2301 [05:14<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.793      0.798      0.854      0.854






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/25      3.02G        inf     0.5436     0.8593          6        640: 100%|██████████| 2301/2301 [04:29<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.785      0.809      0.856      0.856






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/25      3.02G    0.03939     0.5286     0.8586          6        640: 100%|██████████| 2301/2301 [04:31<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.798      0.805      0.859      0.859






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/25      3.02G    0.03421     0.5147     0.8571          6        640: 100%|██████████| 2301/2301 [04:30<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.807      0.801      0.862      0.862






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/25      3.02G    0.03178     0.5007     0.8564          6        640: 100%|██████████| 2301/2301 [04:29<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.793      0.816      0.865      0.865






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/25      3.02G    0.02898     0.4902     0.8547          6        640: 100%|██████████| 2301/2301 [04:29<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.787      0.827      0.866      0.866






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/25      3.02G    0.02687      0.474     0.8558          6        640: 100%|██████████| 2301/2301 [04:28<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.789      0.828      0.866      0.866






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/25      3.02G    0.02472     0.4624      0.857          6        640: 100%|██████████| 2301/2301 [04:29<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.797      0.823      0.867      0.867






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/25      3.02G        inf     0.4466     0.8547          6        640: 100%|██████████| 2301/2301 [04:30<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.801      0.819      0.867      0.867






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/25      3.02G        inf      0.434      0.853          6        640: 100%|██████████| 2301/2301 [04:37<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:

                   all       3197       3197      0.803      0.818      0.867      0.867






25 epochs completed in 2.042 hours.
Optimizer stripped from runs\train\mura_yolov8(25 epoch)73\weights\last.pt, 6.2MB
Optimizer stripped from runs\train\mura_yolov8(25 epoch)73\weights\best.pt, 6.2MB

Validating runs\train\mura_yolov8(25 epoch)73\weights\best.pt...
Ultralytics 8.3.94  Python-3.10.11 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3060, 12288MiB)
Model summary (fused): 72 layers, 3,008,378 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 100/100 [00:


                   all       3197       3197      0.804      0.817      0.867      0.867
     XR_ELBOW_negative        235        235      0.748      0.962      0.883      0.883
     XR_ELBOW_positive        230        230       0.87      0.756      0.904      0.904
    XR_FINGER_negative        214        214      0.707      0.893      0.814      0.814
    XR_FINGER_positive        247        247      0.865       0.75        0.9        0.9
   XR_FOREARM_negative        150        150      0.735      0.852      0.848      0.848
   XR_FOREARM_positive        151        151      0.853      0.615      0.833      0.833
      XR_HAND_negative        271        271      0.742      0.956      0.874      0.874
      XR_HAND_positive        189        189      0.813      0.603      0.804      0.804
   XR_HUMERUS_negative        148        148      0.875      0.797      0.889      0.889
   XR_HUMERUS_positive        140        140      0.844      0.893      0.902      0.902
  XR_SHOULDER_negativ

AttributeError: 'DetectionModel' object has no attribute 'resume'

In [4]:
import os

valid_labels_dir = "D:/Sem 6 project/MURA_YOLO/valid/labels"
y_true = []

# Iterate through label files
for label_file in os.listdir(valid_labels_dir):
    if label_file.endswith(".txt"):
        with open(os.path.join(valid_labels_dir, label_file), "r") as f:
            line = f.readline().strip()
            class_id = int(line.split()[0])  # Extract class ID (first value)
            y_true.append(class_id)

print(f"Extracted {len(y_true)} true labels.")

Extracted 3197 true labels.


In [11]:
import numpy as np
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score,
    roc_auc_score, roc_curve, precision_recall_curve, matthews_corrcoef, confusion_matrix, log_loss
)
import matplotlib.pyplot as plt
from ultralytics import YOLO
import os
import torch

# Clear GPU memory
torch.cuda.empty_cache()

# Paths
VALID_IMAGES_DIR = "D:/Sem 6 project/MURA_YOLO/valid/images"
VALID_LABELS_DIR = "D:/Sem 6 project/MURA_YOLO/valid/labels"
MODEL_PATH = "D:/Sem 6 project/runs/train/mura_yolov8(25 epoch)7/weights/best.pt"

# Step 1: Extract ground truth
y_true = []
label_files = sorted([f for f in os.listdir(VALID_LABELS_DIR) if f.endswith(".txt")])
for label_file in label_files:
    with open(os.path.join(VALID_LABELS_DIR, label_file), "r") as f:
        class_id = int(f.readline().strip().split()[0])
        y_true.append(class_id)

# Step 2: Extract predictions and scores
model = YOLO(MODEL_PATH)
image_files = sorted([os.path.join(VALID_IMAGES_DIR, f) for f in os.listdir(VALID_IMAGES_DIR) if f.endswith(".png")])
batch_size = 8
y_pred = []
y_scores = []
y_scores_full = []

for i in range(0, len(image_files), batch_size):
    batch_files = image_files[i:i + batch_size]
    results = model.predict(batch_files, save=False, imgsz=640, conf=0.1, device="cuda" if torch.cuda.is_available() else "cpu", verbose=False)
    
    for result in results:
        if len(result.boxes) > 0:
            box = result.boxes[0]  # Highest confidence detection
            pred_class = int(box.cls.item())
            pred_score = box.conf.item()
            y_pred.append(pred_class)
            y_scores.append(pred_score)
            scores = [pred_score if j == pred_class else (1 - pred_score) / (13) for j in range(14)]
            y_scores_full.append(scores / np.sum(scores))
        else:
            print(f"Warning: No detection for image {result.path}")
            y_pred.append(0)
            y_scores.append(0.0)
            y_scores_full.append([1.0 / 14] * 14)
    
    torch.cuda.empty_cache()

# Verify lengths
print(f"Length of y_true: {len(y_true)}, Length of y_pred: {len(y_pred)}")
assert len(y_true) == len(y_pred), "Mismatch in lengths! Check image-label pairing."

# Compute metrics
accuracy = accuracy_score(y_true, y_pred)

# Explicitly extract scalar values
precision = precision_score(y_true, y_pred, average="macro", zero_division=0)
precision = precision.item() if hasattr(precision, 'item') else float(precision)

recall = recall_score(y_true, y_pred, average="macro", zero_division=0)
recall = recall.item() if hasattr(recall, 'item') else float(recall)

f1 = f1_score(y_true, y_pred, average="macro", zero_division=0)
f1 = f1.item() if hasattr(f1, 'item') else float(f1)

kappa = cohen_kappa_score(y_true, y_pred)
mcc = matthews_corrcoef(y_true, y_pred)

# Specificity and G-mean
cm = confusion_matrix(y_true, y_pred)
specificities = []
for i in range(cm.shape[0]):
    tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
    fp = cm[:, i].sum() - cm[i, i]
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    specificities.append(specificity)
specificity_avg = np.mean(specificities)
g_mean = np.sqrt(recall * specificity_avg)

# ROC AUC and Log Loss
n_classes = 14
class_names = ['XR_ELBOW_neg', 'XR_ELBOW_pos', 'XR_FINGER_neg', 'XR_FINGER_pos', 
               'XR_FOREARM_neg', 'XR_FOREARM_pos', 'XR_HAND_neg', 'XR_HAND_pos',
               'XR_HUMERUS_neg', 'XR_HUMERUS_pos', 'XR_SHOULDER_neg', 'XR_SHOULDER_pos',
               'XR_WRIST_neg', 'XR_WRIST_pos']
y_true_bin = np.array([np.eye(n_classes)[label] for label in y_true])
roc_auc = roc_auc_score(y_true_bin, y_scores_full, multi_class="ovr", average="macro")
logloss = log_loss(y_true_bin, y_scores_full)

# ROC Curves
plt.figure(figsize=(10, 8))
for i in range(n_classes):
    fpr, tpr, _ = roc_curve(y_true_bin[:, i], [score[i] for score in y_scores_full])
    plt.plot(fpr, tpr, label=f'{class_names[i]} (AUC = {roc_auc_score(y_true_bin[:, i], [score[i] for score in y_scores_full]):.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves (One-vs-Rest)')
plt.legend(loc='best', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()

# PR Curves
plt.figure(figsize=(10, 8))
for i in range(n_classes):
    precision, recall_pr, _ = precision_recall_curve(y_true_bin[:, i], [score[i] for score in y_scores_full])
    plt.plot(recall_pr, precision, label=f'{class_names[i]}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curves (One-vs-Rest)')
plt.legend(loc='best', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()

# Print metrics
print(f"Accuracy: {accuracy:.4f}")

print(f"Specificity (Avg): {specificity_avg:.4f}")
print(f"G-Mean: {g_mean:.4f}")
print(f"AUC (ROC-AUC): {roc_auc:.4f}")
print(f"MCC: {mcc:.4f}")
print(f"Cohen's Kappa: {kappa:.4f}")
print(f"Log Loss: {logloss:.4f}")

Length of y_true: 3197, Length of y_pred: 3197


<Figure size 1000x800 with 1 Axes>

<Figure size 1000x800 with 1 Axes>

Accuracy: 0.5837
Specificity (Avg): 0.9676
G-Mean: 0.7322
AUC (ROC-AUC): 0.8115
MCC: 0.5553
Cohen's Kappa: 0.5469
Log Loss: 1.7554


In [14]:
import numpy as np
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score,
    roc_auc_score, roc_curve, precision_recall_curve, matthews_corrcoef, confusion_matrix, log_loss
)
import matplotlib.pyplot as plt
from ultralytics import YOLO
import os
import torch

# Clear GPU memory
torch.cuda.empty_cache()

# Paths
VALID_IMAGES_DIR = "D:/Sem 6 project/MURA_YOLO/valid/images"
VALID_LABELS_DIR = "D:/Sem 6 project/MURA_YOLO/valid/labels"
MODEL_PATH = "D:/Sem 6 project/runs/train/mura_yolov8(25 epoch)7/weights/best.pt"

# Step 1: Extract ground truth
y_true = []
label_files = sorted([f for f in os.listdir(VALID_LABELS_DIR) if f.endswith(".txt")])
for label_file in label_files:
    with open(os.path.join(VALID_LABELS_DIR, label_file), "r") as f:
        class_id = int(f.readline().strip().split()[0])
        y_true.append(class_id)

# Step 2: Extract predictions and scores
model = YOLO(MODEL_PATH)
image_files = sorted([os.path.join(VALID_IMAGES_DIR, f) for f in os.listdir(VALID_IMAGES_DIR) if f.endswith(".png")])
batch_size = 8
y_pred = []
y_scores = []
y_scores_full = []

for i in range(0, len(image_files), batch_size):
    batch_files = image_files[i:i + batch_size]
    results = model.predict(batch_files, save=False, imgsz=640, conf=0.1, device="cuda" if torch.cuda.is_available() else "cpu", verbose=False)
    
    for result in results:
        if len(result.boxes) > 0:
            box = result.boxes[0]  # Highest confidence detection
            pred_class = int(box.cls.item())
            pred_score = box.conf.item()
            y_pred.append(pred_class)
            y_scores.append(pred_score)
            scores = [pred_score if j == pred_class else (1 - pred_score) / (13) for j in range(14)]
            y_scores_full.append(scores / np.sum(scores))
        else:
            print(f"Warning: No detection for image {result.path}")
            y_pred.append(0)
            y_scores.append(0.0)
            y_scores_full.append([1.0 / 14] * 14)
    
    torch.cuda.empty_cache()

# Verify lengths
print(f"Length of y_true: {len(y_true)}, Length of y_pred: {len(y_pred)}")
assert len(y_true) == len(y_pred), "Mismatch in lengths! Check image-label pairing."

# Compute metrics
accuracy = accuracy_score(y_true, y_pred)

# Explicitly convert to float
precision = float(precision_score(y_true, y_pred, average="macro", zero_division=0))
recall = float(recall_score(y_true, y_pred, average="macro", zero_division=0))
f1 = float(f1_score(y_true, y_pred, average="macro", zero_division=0))
kappa = cohen_kappa_score(y_true, y_pred)
mcc = matthews_corrcoef(y_true, y_pred)

# Specificity and G-mean
cm = confusion_matrix(y_true, y_pred)
specificities = []
for i in range(cm.shape[0]):
    tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
    fp = cm[:, i].sum() - cm[i, i]
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    specificities.append(specificity)
specificity_avg = np.mean(specificities)
g_mean = np.sqrt(recall * specificity_avg)

# ROC AUC and Log Loss
n_classes = 14
class_names = ['XR_ELBOW_neg', 'XR_ELBOW_pos', 'XR_FINGER_neg', 'XR_FINGER_pos', 
               'XR_FOREARM_neg', 'XR_FOREARM_pos', 'XR_HAND_neg', 'XR_HAND_pos',
               'XR_HUMERUS_neg', 'XR_HUMERUS_pos', 'XR_SHOULDER_neg', 'XR_SHOULDER_pos',
               'XR_WRIST_neg', 'XR_WRIST_pos']
y_true_bin = np.array([np.eye(n_classes)[label] for label in y_true])
roc_auc = float(roc_auc_score(y_true_bin, y_scores_full, multi_class="ovr", average="macro"))
logloss = float(log_loss(y_true_bin, y_scores_full))

# ROC Curves
plt.figure(figsize=(10, 8))
for i in range(n_classes):
    fpr, tpr, _ = roc_curve(y_true_bin[:, i], [score[i] for score in y_scores_full])
    plt.plot(fpr, tpr, label=f'{class_names[i]} (AUC = {float(roc_auc_score(y_true_bin[:, i], [score[i] for score in y_scores_full])):.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves (One-vs-Rest)')
plt.legend(loc='best', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()

# PR Curves
plt.figure(figsize=(10, 8))
for i in range(n_classes):
    precision, recall_pr, _ = precision_recall_curve(y_true_bin[:, i], [score[i] for score in y_scores_full])
    plt.plot(recall_pr, precision, label=f'{class_names[i]}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curves (One-vs-Rest)')
plt.legend(loc='best', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()

# Print metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Specificity (Avg): {specificity_avg:.4f}")
print(f"G-Mean: {g_mean:.4f}")
print(f"AUC (ROC-AUC): {roc_auc:.4f}")
print(f"MCC: {mcc:.4f}")
print(f"Cohen's Kappa: {kappa:.4f}")
print(f"Log Loss: {logloss:.4f}")

Length of y_true: 3197, Length of y_pred: 3197


<Figure size 1000x800 with 1 Axes>

<Figure size 1000x800 with 1 Axes>

Accuracy: 0.5837
Precision: [   0.092274    0.092303    0.092332 ...           1           1           1]
Recall: 0.5539817196650395
F1 Score: 0.5175998154250524
Specificity (Avg): 0.9676
G-Mean: 0.7322
AUC (ROC-AUC): 0.8115
MCC: 0.5553
Cohen's Kappa: 0.5469
Log Loss: 1.7554
