In [1]:
import os
from ultralytics import YOLO
import pandas as pd
from tqdm import tqdm

In [2]:
# Creating a list with the paths to each patient's folder.

DIR = '../data/detection/patients'
patient_folders = sorted([os.path.join(DIR, x, 'images') for x in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, x))])
patient_folders

['../data/detection/patients/patient 01/images',
 '../data/detection/patients/patient 02/images',
 '../data/detection/patients/patient 03/images',
 '../data/detection/patients/patient 04/images',
 '../data/detection/patients/patient 05/images',
 '../data/detection/patients/patient 06/images',
 '../data/detection/patients/patient 07/images',
 '../data/detection/patients/patient 08/images',
 '../data/detection/patients/patient 09/images',
 '../data/detection/patients/patient 10/images']

In [3]:
model = YOLO('weights.pt') # Weights of our final model.
CONF = 0.532 # Confidence threshold for our final model obtained during validation.

In [4]:
# Initialize an empty list to store the results for each patient.
results = []

# Iterate through the folders of each patient
for patient_folder in tqdm(patient_folders):

    # Get the list of image files within the current patient's folder.
    image_files = os.listdir(patient_folder)

    # Extract the patient's identifier from the folder path.
    patient = int(patient_folder.split('/')[-2].split(' ')[-1])

    # Initialize counters for plasma cells and non-plasma cells.
    plasma_cells = 0
    non_plasma_cells = 0
    
    # Iterate through each image file in the current patient's folder.
    for image_file in image_files:
        # Use the model to make predictions on the image.
        detections = model(os.path.join(patient_folder, image_file), conf=CONF, verbose=False)[0].boxes.cls

        # Count the number of plasma cells (class 0) and non-plasma cells (class 1).
        plasma_cells += int((detections == 0).sum())
        non_plasma_cells += int((detections == 1).sum())

    # Calculate the total number of cells and the percentage of plasma cells.
    total = plasma_cells + non_plasma_cells    
    percentage = round(plasma_cells / total, 3)

    # Determine the diagnosis based on the percentage of plasma cells.
    # If the percentage is greater than or equal to 10%, classify as 'diseased'; otherwise, as 'normal'.
    diagnosis = 'diseased' if percentage >= 0.1 else 'normal'

    # Append the results for the current patient to the results list.
    results.append([patient, plasma_cells, non_plasma_cells, total, percentage, diagnosis])

100%|██████████| 10/10 [00:24<00:00,  2.48s/it]


In [5]:
# Load and diaply the ground truth data from a CSV file containing patient diagnoses.
ground_truth = pd.read_csv('../data/detection/patients/diagnosis.csv')
ground_truth

Unnamed: 0,patient,plasma_cells,non_plasma_cells,total,percentage,diagnosis
0,1,82,120,202,0.406,diseased
1,2,78,127,205,0.374,diseased
2,3,72,128,200,0.36,diseased
3,4,82,116,198,0.414,diseased
4,5,71,129,200,0.355,diseased
5,6,15,185,200,0.075,normal
6,7,18,187,205,0.088,normal
7,8,9,199,208,0.043,normal
8,9,14,188,202,0.069,normal
9,10,15,188,203,0.074,normal


In [None]:
# Display the patient diagnoses generated by the model.
results = pd.DataFrame(results, columns=['patient', 'plasma_cells', 'non_plasma_cell', 'total', 'percentage', 'diagnosis'])
results

Unnamed: 0,patient,plasma_cells,non_plasma_cell,total,percentage,diagnosis
0,1,77,118,195,0.395,diseased
1,2,82,122,204,0.402,diseased
2,3,76,124,200,0.38,diseased
3,4,71,99,170,0.418,diseased
4,5,70,108,178,0.393,diseased
5,6,18,197,215,0.084,normal
6,7,26,208,234,0.111,diseased
7,8,15,238,253,0.059,normal
8,9,15,208,223,0.067,normal
9,10,14,197,211,0.066,normal
