In [22]:
%load_ext autoreload
%autoreload 2
from prepare.preprocessing import process_all_scans
from prepare.patch_extraction import extract_negative_patches_from_candidates, extract_patches_from_annotations
from prepare.build_training_index import build_training_index, create_balanced_training_csv
from prepare.classes import LunaPatchDataset, Advanced3DAugment
from torch.utils.data import random_split, DataLoader

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
LUNA_PATH = r"D:\archive"
OUTPUT_PATH = r"D:\output"
PREPROCESSED_OUTPUT = r"D:\output\preprocessed_luna16"
ANNOTATION_FILE = LUNA_PATH + r"\annotations.csv"
CANDIDATES_FILE = LUNA_PATH + r"\candidates_V2\candidates_V2.csv"
PATCH_OUTPUT = OUTPUT_PATH + r"\patches"
TRAINING_FILE = OUTPUT_PATH + r"\training_balanced.csv"
METADATA_FILE = PREPROCESSED_OUTPUT + r"\preprocessed_metadata.csv"

In [7]:
# --- Stage 1: Preprocess All Scans ---
process_all_scans(LUNA_PATH, PREPROCESSED_OUTPUT)

Processing 89 files in subset0/subset0...
Saved: D:\output\preprocessed_luna16\1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260.npy | Shape: (302, 390, 390)
Saved: D:\output\preprocessed_luna16\1.3.6.1.4.1.14519.5.2.1.6279.6001.108197895896446896160048741492.npy | Shape: (298, 380, 380)
Saved: D:\output\preprocessed_luna16\1.3.6.1.4.1.14519.5.2.1.6279.6001.109002525524522225658609808059.npy | Shape: (201, 281, 281)
Saved: D:\output\preprocessed_luna16\1.3.6.1.4.1.14519.5.2.1.6279.6001.111172165674661221381920536987.npy | Shape: (336, 380, 380)
Saved: D:\output\preprocessed_luna16\1.3.6.1.4.1.14519.5.2.1.6279.6001.122763913896761494371822656720.npy | Shape: (310, 315, 315)
Saved: D:\output\preprocessed_luna16\1.3.6.1.4.1.14519.5.2.1.6279.6001.124154461048929153767743874565.npy | Shape: (351, 370, 370)
Saved: D:\output\preprocessed_luna16\1.3.6.1.4.1.14519.5.2.1.6279.6001.126121460017257137098781143514.npy | Shape: (332, 285, 285)
Saved: D:\output\preprocessed_luna16\1.3.

In [3]:
# --- Stage 2: Extract 3D Patches for Training ---
extract_patches_from_annotations(
    annotation_csv=ANNOTATION_FILE,
    metadata_csv=METADATA_FILE,
    output_folder=PATCH_OUTPUT,
    patch_size=32
)

Saved patch: D:\output\patches\1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405_266_183_96_pos.npy
Saved patch: D:\output\patches\1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405_252_163_122_pos.npy
Saved patch: D:\output\patches\1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405_230_140_211_pos.npy
Saved patch: D:\output\patches\1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405_142_187_210_pos.npy
Saved patch: D:\output\patches\1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028192176989979435275_191_203_249_pos.npy
Saved patch: D:\output\patches\1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760173137525888934217_104_215_98_pos.npy
Saved patch: D:\output\patches\1.3.6.1.4.1.14519.5.2.1.6279.6001.106164978370116976238911317774_158_194_157_pos.npy
Saved patch: D:\output\patches\1.3.6.1.4.1.14519.5.2.1.6279.6001.106719103982792863757268101375_186_216_106_pos.npy
Saved patch: D:\output\patches\1.3.6.1.4.1.14519.5.2.1.6279.6001.107351566

In [12]:
extract_negative_patches_from_candidates(
    candidates_csv=CANDIDATES_FILE,
    annotations_csv=ANNOTATION_FILE,
    metadata_csv=METADATA_FILE,
    output_folder=PATCH_OUTPUT,
    patch_size=32,
    max_negatives_per_scan=5
)

Extracting Negatives: 100%|██████████████████████████████████████████████████████████| 445/445 [20:46<00:00,  2.80s/it]


In [14]:
# --- Stage 3: Build the Balanced Training Index CSV ---
create_balanced_training_csv(
    patch_folder=PATCH_OUTPUT,
    output_csv=TRAINING_FILE,
    oversample_pos=True,
    downsample_neg=True
)

Original: 615 positive, 1800 negative
Balanced dataset saved: D:\output\training_balanced.csv
Final counts → Positive: 615, Negative: 615


In [39]:
# --- Stage 5: Augmentation & DataLoader ---
augment_adv = Advanced3DAugment()

train_dataset = LunaPatchDataset(
    csv_file=TRAINING_FILE,
    transform=Advanced3DAugment(), 
    hu_min=-1000,
    hu_max=400,
    zero_center=True
)

In [None]:
# debug for each tensor should ne equal size error
for i in range(100):
    x, y = train_dataset[i]
    print(f"Sample {i} shape: {x.shape}")

In [40]:
# --- Stage 6: Split Into Train/Validation ---
val_percent = 0.2
val_size = int(len(train_dataset) * val_percent)
train_size = len(train_dataset) - val_size

train_ds, val_ds = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=16, shuffle=False)

In [41]:
import torch
import torch.nn as nn
from model.model import Luna3DCNN, run_training

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [42]:
model = Luna3DCNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = torch.nn.BCELoss()

run_training(
    model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    num_epochs=20,
    checkpoint_path="best_model.pt"
)


Epoch 1/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [01:10<00:00,  1.14s/it]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:18<00:00,  1.13s/it]


Train Loss: 0.6935 | Acc: 0.4858 | AUC: 0.4661
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5658
✅ Saved new best model (AUC: 0.5658)

Epoch 2/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:54<00:00,  1.15it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:14<00:00,  1.11it/s]


Train Loss: 0.6933 | Acc: 0.4868 | AUC: 0.4761
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5253

Epoch 3/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:53<00:00,  1.15it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:13<00:00,  1.23it/s]


Train Loss: 0.6933 | Acc: 0.4756 | AUC: 0.4671
Val   Loss: 0.6932 | Acc: 0.4919 | AUC: 0.4694

Epoch 4/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.29it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.26it/s]


Train Loss: 0.6935 | Acc: 0.5051 | AUC: 0.4765
Val   Loss: 0.6932 | Acc: 0.4919 | AUC: 0.4796

Epoch 5/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.32it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.26it/s]


Train Loss: 0.6932 | Acc: 0.4654 | AUC: 0.4601
Val   Loss: 0.6931 | Acc: 0.5081 | AUC: 0.5320

Epoch 6/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.32it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.26it/s]


Train Loss: 0.6932 | Acc: 0.4685 | AUC: 0.4501
Val   Loss: 0.6931 | Acc: 0.5081 | AUC: 0.5127

Epoch 7/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:46<00:00,  1.32it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.27it/s]


Train Loss: 0.6932 | Acc: 0.4837 | AUC: 0.4698
Val   Loss: 0.6931 | Acc: 0.5407 | AUC: 0.5571

Epoch 8/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.29it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.26it/s]


Train Loss: 0.6932 | Acc: 0.4990 | AUC: 0.4639
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5531

Epoch 9/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.31it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.27it/s]


Train Loss: 0.6932 | Acc: 0.4970 | AUC: 0.4694
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5361

Epoch 10/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.31it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.26it/s]


Train Loss: 0.6932 | Acc: 0.5020 | AUC: 0.4583
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5224

Epoch 11/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.32it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.26it/s]


Train Loss: 0.6932 | Acc: 0.4766 | AUC: 0.4670
Val   Loss: 0.6932 | Acc: 0.4919 | AUC: 0.4312

Epoch 12/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.31it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.28it/s]


Train Loss: 0.6932 | Acc: 0.5020 | AUC: 0.4816
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5457

Epoch 13/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:46<00:00,  1.32it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.27it/s]


Train Loss: 0.6932 | Acc: 0.5020 | AUC: 0.4694
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5318

Epoch 14/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.30it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.28it/s]


Train Loss: 0.6932 | Acc: 0.5020 | AUC: 0.4859
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5269

Epoch 15/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.30it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.28it/s]


Train Loss: 0.6932 | Acc: 0.5020 | AUC: 0.4694
Val   Loss: 0.6932 | Acc: 0.4919 | AUC: 0.5247

Epoch 16/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:46<00:00,  1.32it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.27it/s]


Train Loss: 0.6932 | Acc: 0.5020 | AUC: 0.4827
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5703
✅ Saved new best model (AUC: 0.5703)

Epoch 17/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:46<00:00,  1.32it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.29it/s]


Train Loss: 0.6932 | Acc: 0.5020 | AUC: 0.4800
Val   Loss: 0.6932 | Acc: 0.4919 | AUC: 0.5047

Epoch 18/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:46<00:00,  1.33it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.26it/s]


Train Loss: 0.6932 | Acc: 0.5020 | AUC: 0.4662
Val   Loss: 0.6931 | Acc: 0.4919 | AUC: 0.5576

Epoch 19/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.30it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.27it/s]


Train Loss: 0.6932 | Acc: 0.5020 | AUC: 0.4885
Val   Loss: 0.6932 | Acc: 0.4919 | AUC: 0.4772

Epoch 20/20


Training: 100%|████████████████████████████████████████████████████████████████████████| 62/62 [00:47<00:00,  1.31it/s]
Validating: 100%|██████████████████████████████████████████████████████████████████████| 16/16 [00:12<00:00,  1.27it/s]

Train Loss: 0.6933 | Acc: 0.5041 | AUC: 0.4904
Val   Loss: 0.6930 | Acc: 0.5081 | AUC: 0.5749
✅ Saved new best model (AUC: 0.5749)



