## Imports

In [1]:
# Standard Python Libraries
import os
import glob
import json
import pickle
import random
import xml.etree.ElementTree as ET
import gc
import csv

# Data Processing
import numpy as np
import pandas as pd

# Image Processing and Visualization
import cv2
import pydicom
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
import seaborn as sns
from PIL import Image

# PyTorch and Deep Learning
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models
from torchvision.models import resnet50, ResNet50_Weights

# Evaluation and Metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
    precision_recall_fscore_support,
    classification_report
)

# External Libraries and Tools
from ultralytics import YOLO
import albumentations as A
import optuna
import supervision as sv
from tqdm import tqdm

# Custom Utilities
from VisualizationTools.get_data_from_XML import XML_preprocessor, get_category
from VisualizationTools.get_gt import get_gt
from VisualizationTools.getUID import getUID_path
from VisualizationTools.utils import loadFileInformation

# Matplotlib Configuration
%matplotlib inline


  check_for_updates()


## Test (LUNG-PET-Dx)

#### YOLOv8n LUNG-PET-Dx

In [20]:
%%time

# Load the YOLOv8 model
model_path = '/Users/catarinasilva/Desktop/Master Thesis/lung cancer/runs/detect/lung_cancer_detection_split4(16batch)/weights/best.pt'
model = YOLO(model_path)

# Check the device the model is running on
print(f"Model is on device: {model.device}")

# Path to the dataset YAML configuration file
data_config = '/Users/catarinasilva/Desktop/Master Thesis/lung cancer/YOLOv8 model config/lung_cancer_config.yml'

# Validate the model on the test dataset
results_test = model.val(data=data_config, 
                    split='test',
                    max_det = 1,
                    name='test_YOLO_orig(16batch)',
                    imgsz=512)

Model is on device: cpu
Ultralytics 8.3.63 ðŸš€ Python-3.11.4 torch-2.2.2 CPU (Apple M1)
Model summary (fused): 168 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /Users/catarinasilva/Desktop/Master Thesis/lung cancer/YOLOv8/labe[0m


[34m[1mval: [0mNew cache created: /Users/catarinasilva/Desktop/Master Thesis/lung cancer/YOLOv8/labels/test.cache


                 Class     Images  Instances      Box(P          R      mAP50  m


                   all       1358       1358      0.322      0.312      0.271      0.137
        Adenocarcinoma        475        475      0.646      0.507      0.613      0.307
  Small Cell Carcinoma        354        354      0.277      0.189      0.184      0.103
  Large Cell Carcinoma         43         43          0          0          0          0
Squamous Cell Carcinoma        486        486      0.364      0.551      0.288      0.137
Speed: 0.6ms preprocess, 128.3ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/test_results_lung_cancer_detection_split4(16batch)[0m
CPU times: user 13min 26s, sys: 5min 32s, total: 18min 59s
Wall time: 3min


### YOLOv8s - 16 batch (LUNG-PET-Dx)

In [4]:
%%time

# Load the YOLOv8 model
model_path = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/runs/detect/YOLOorig_small(16batch)/weights/best.pt'
model = YOLO(model_path)

# Check the device the model is running on
print(f"Model is on device: {model.device}")

# Path to the dataset YAML configuration file
data_config = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8_model_config/lung_cancer_config.yml'

# Validate the model on the test dataset
results_test = model.val(data=data_config, 
                    split='test',
                    max_det = 1,
                    name='test_YOLO_orig_small(16batch)',
                    imgsz=512)

Model is on device: cpu
Ultralytics 8.3.77 ðŸš€ Python-3.11.4 torch-2.6.0 CPU (Apple M1)
Model summary (fused): 72 layers, 11,127,132 parameters, 0 gradients, 28.4 GFLOPs


[34m[1mval: [0mScanning /Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8/labe[0m
                 Class     Images  Instances      Box(P          R      mAP50  m


                   all       1358       1358      0.325      0.298      0.249      0.116
        Adenocarcinoma        475        475       0.55      0.532      0.461        0.2
  Small Cell Carcinoma        354        354      0.369      0.116      0.214      0.117
  Large Cell Carcinoma         43         43          0          0          0          0
Squamous Cell Carcinoma        486        486      0.381      0.545      0.319      0.145
Speed: 0.5ms preprocess, 273.8ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/test_YOLO_orig_small(16batch)[0m
CPU times: user 30min 28s, sys: 11min 40s, total: 42min 9s
Wall time: 6min 19s


### Yolov8n + ResNet50 (LUNG-PET-Dx) - 4 frozen layers + default parameters

In [5]:
%%time

# Load the YOLOv8 model
model_path = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/runs/detect/lung_cancer_detection_ResNet50/weights/best.pt'
model = YOLO(model_path)

# Check the device the model is running on
print(f"Model is on device: {model.device}")

# Path to the dataset YAML configuration file
data_config = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8_model_config/lung_cancer_config.yml'

# Validate the model on the test dataset
results_test = model.val(data=data_config, 
                    split='test',
                    max_det = 1,
                    name='test_lung_cancer_detection_ResNet50',
                    imgsz=512)

Model is on device: cpu
Ultralytics 8.3.77 ðŸš€ Python-3.11.4 torch-2.6.0 CPU (Apple M1)
lung_cancer_config_ summary (fused): 146 layers, 64,565,468 parameters, 0 gradients, 163.1 GFLOPs


[34m[1mval: [0mScanning /Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8/labe[0m
                 Class     Images  Instances      Box(P          R      mAP50  m


                   all       1358       1358      0.317      0.239      0.239       0.11
        Adenocarcinoma        475        475      0.535      0.362      0.404      0.164
  Small Cell Carcinoma        354        354      0.378      0.285      0.296       0.15
  Large Cell Carcinoma         43         43          0          0          0          0
Squamous Cell Carcinoma        486        486      0.355      0.309      0.256      0.125
Speed: 0.7ms preprocess, 1317.9ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1mruns/detect/test_lung_cancer_detection_ResNet50[0m
CPU times: user 2h 32min 51s, sys: 35min 12s, total: 3h 8min 3s
Wall time: 29min 57s


## Test (LUNG-PET-Dx + NSCLC-Radiomics)

### YOLOv8n - 16 batch (LUNG-PET-Dx + NSCLC-Radiomics) E + G patients

In [4]:
# Old split + new patioents E

# Load the YOLOv8 model
model_path = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/runs/detect/YOLOorig_nano(16batch)_E_G/weights/last.pt'
model = YOLO(model_path)

# Check the device the model is running on
print(f"Model is on device: {model.device}")

# Path to the dataset YAML configuration file
data_config = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8_model_config/lung_cancer_config_E.yaml'

# Validate the model on the test dataset
results_test = model.val(data=data_config, 
                    split='test',
                    max_det = 1,
                    iou=0.6,
                    conf=0.05,
                    name='test_YOLOorig_nano(16batch)_E_G_last',
                    imgsz=512)

Model is on device: cpu
Ultralytics 8.3.77 ðŸš€ Python-3.11.4 torch-2.6.0 CPU (Apple M1)
Model summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8 E/la[0m
                 Class     Images  Instances      Box(P          R      mAP50  m


                   all       1687       1687      0.483      0.425      0.418      0.213
        Adenocarcinoma        475        475      0.534        0.6      0.499      0.219
  Small Cell Carcinoma        354        354      0.429      0.206      0.317      0.163
  Large Cell Carcinoma        302        302      0.519       0.45      0.488       0.28
Squamous Cell Carcinoma        556        556      0.451      0.444      0.367       0.19
Speed: 0.7ms preprocess, 199.9ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/test_YOLOorig_nano(16batch)_E_G_last[0m


### YOLOv8s - 16 batch (LUNG-PET-Dx + NSCLC-Radiomics) E + G patients

In [56]:
# Old split + new patioents E

# Load the YOLOv8 model
model_path = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/runs/detect/YOLOorig_small(16batch)_E_G/weights/best.pt'
model = YOLO(model_path)

# Check the device the model is running on
print(f"Model is on device: {model.device}")

# Path to the dataset YAML configuration file
data_config = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8_model_config/lung_cancer_config_E.yaml'

# Validate the model on the test dataset
results_test = model.val(data=data_config, 
                    split='test',
                    max_det = 1,
                    iou=0.5,
                    conf=0.15,
                    name='test_YOLOorig_small(16batch)_E_G',
                    imgsz=512)

Model is on device: cpu
Ultralytics 8.3.77 ðŸš€ Python-3.11.4 torch-2.6.0 CPU (Apple M1)
Model summary (fused): 72 layers, 11,127,132 parameters, 0 gradients, 28.4 GFLOPs


[34m[1mval: [0mScanning /Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8 E/la[0m
                 Class     Images  Instances      Box(P          R      mAP50  m


                   all       1687       1687      0.455      0.387      0.382        0.2
        Adenocarcinoma        475        475      0.603      0.549      0.492      0.246
  Small Cell Carcinoma        354        354      0.341      0.175      0.238      0.119
  Large Cell Carcinoma        302        302      0.564      0.467      0.531      0.302
Squamous Cell Carcinoma        556        556      0.312      0.358      0.268      0.133
Speed: 1.2ms preprocess, 407.2ms inference, 0.0ms loss, 0.6ms postprocess per image
Results saved to [1mruns/detect/test_YOLOorig_small(16batch)_E_G[0m


### YOLOv8n - 32 batch (LUNG-PET-Dx + NSCLC-Radiomics) E + G patients

In [4]:
# Old split + new patioents E

# Load the YOLOv8 model
model_path = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/runs/detect/YOLOorig_nano(32batch)_E_G/weights/best.pt'
model = YOLO(model_path)

# Check the device the model is running on
print(f"Model is on device: {model.device}")

# Path to the dataset YAML configuration file
data_config = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8_model_config/lung_cancer_config_E.yaml'

# Validate the model on the test dataset
results_test = model.val(data=data_config,
                    split='test',
                    max_det = 1,
                    iou=0.6,
                    conf=0.05,
                    name='test_YOLOorig_nano(32batch)_E_G',
                    imgsz=512)

Model is on device: cpu
Ultralytics 8.3.77 ðŸš€ Python-3.11.4 torch-2.6.0 CPU (Apple M1)
Model summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs


python(39498) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(39499) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(39500) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
[34m[1mval: [0mScanning /Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8 E/la[0m
                 Class     Images  Instances      Box(P          R      mAP50  m


                   all       1687       1687      0.485      0.374      0.383      0.195
        Adenocarcinoma        475        475      0.554      0.364      0.428      0.203
  Small Cell Carcinoma        354        354      0.495      0.274      0.319      0.136
  Large Cell Carcinoma        302        302      0.555      0.387       0.48      0.278
Squamous Cell Carcinoma        556        556      0.337      0.471      0.305      0.163
Speed: 0.9ms preprocess, 185.2ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1mruns/detect/test_YOLOorig_nano(32batch)_E_G[0m


### YOLOv8s - 32 batch (LUNG-PET-Dx + NSCLC-Radiomics) E + G patients

In [5]:
# Old split + new patioents E

# Load the YOLOv8 model
model_path = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/runs/detect/YOLOorig_small(32batch)_E_G/weights/best.pt'
model = YOLO(model_path)

# Check the device the model is running on
print(f"Model is on device: {model.device}")

# Path to the dataset YAML configuration file
data_config = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8_model_config/lung_cancer_config_E.yaml'

# Validate the model on the test dataset
results_test = model.val(data=data_config,
                    split='test',
                    max_det = 1,
                    iou=0.6,
                    conf=0.05,
                    name='test_YOLOorig_small(32batch)_E_G',
                    imgsz=512)

Model is on device: cpu
Ultralytics 8.3.77 ðŸš€ Python-3.11.4 torch-2.6.0 CPU (Apple M1)
Model summary (fused): 72 layers, 11,127,132 parameters, 0 gradients, 28.4 GFLOPs


[34m[1mval: [0mScanning /Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8 E/la[0m
                 Class     Images  Instances      Box(P          R      mAP50  m


                   all       1687       1687      0.432      0.326      0.338      0.175
        Adenocarcinoma        475        475      0.667      0.472      0.525      0.249
  Small Cell Carcinoma        354        354      0.225      0.158      0.134      0.063
  Large Cell Carcinoma        302        302      0.547      0.421      0.511      0.291
Squamous Cell Carcinoma        556        556       0.29      0.254      0.182     0.0956
Speed: 0.6ms preprocess, 297.9ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1mruns/detect/test_YOLOorig_small(32batch)_E_G[0m


## Other trials

### YOLOv8s - 16 batch (LUNG-PET-Dx + NSCLC-Radiomics) only E patients

In [None]:
# New split + new patioents E
%%time

# Load the YOLOv8 model
model_path = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/runs/detect/YOLOorig_small(16batch)_2datasets_E/weights/best.pt'
model = YOLO(model_path)

# Check the device the model is running on
print(f"Model is on device: {model.device}")

# Path to the dataset YAML configuration file
data_config = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8_model_config/lung_cancer_config_2datasets_E.yml'

# Validate the model on the test dataset
results_test = model.val(data=data_config, 
                    split='test',
                    max_det = 1,
                    name='test_YOLOorig_small(16batch)_2datasets_E',
                    imgsz=512)

### YOLOv8s - 16 batch (LUNG-PET-Dx + NSCLC-Radiomics) only E patients (different split)

In [None]:
%%time

# Load the YOLOv8 model
model_path = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/runs/detect/YOLOorig_small(16batch)_2datasets/weights/best.pt'
model = YOLO(model_path)

# Check the device the model is running on
print(f"Model is on device: {model.device}")

# Path to the dataset YAML configuration file
data_config = '/Users/catarinasilva/Desktop/Master Thesis/lung_cancer/YOLOv8_model_config/lung_cancer_config_2datasets.yml'

# Validate the model on the test dataset
results_test = model.val(data=data_config, 
                    split='test',
                    max_det = 1,
                    name='test_YOLOorig_small(16batch)_2datasets',
                    imgsz=512)