In [3]:
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
from collections import namedtuple
import functools
import csv
import SimpleITK as sitk
from util import XyzTuple, xyz2irc
from util import enumerateWithEstimate
from logconf import logging

from disk import getCache
from desets import LunaDataset
import torch
import torch.cuda
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

log = logging.getLogger(__name__)
# log.setLevel(logging.WARN)
log.setLevel(logging.INFO)

In [29]:
batch_size = 128
num_workers = 0

prep_dl = DataLoader(
    LunaDataset(
        sortby_str="series_uid",
    ),
    batch_size=batch_size,
    num_workers=num_workers,
)

2024-10-28 16:47:44,625 INFO     pid:33738 desets:202:__init__ <desets.LunaDataset object at 0x7b622264fc90>: 56938 training samples


In [25]:
prep_dl.batch_size

128

In [5]:
import datetime

print(datetime.datetime.now())
batch_iter = enumerateWithEstimate(
    prep_dl,
    "Stuffing cache",
    start_ndx=prep_dl.num_workers,
)
for _ in batch_iter:
    pass
print(datetime.datetime.now())



2024-10-28 16:24:08.872623


2024-10-28 16:24:09,075 INFO     pid:33738 util:245:enumerateWithEstimate Stuffing cache    4/445, done at 2024-10-28 16:24:26, 0:00:17
2024-10-28 16:24:09,539 INFO     pid:33738 util:245:enumerateWithEstimate Stuffing cache   16/445, done at 2024-10-28 16:24:26, 0:00:17
2024-10-28 16:24:11,354 INFO     pid:33738 util:245:enumerateWithEstimate Stuffing cache   64/445, done at 2024-10-28 16:24:25, 0:00:16
2024-10-28 16:24:18,624 INFO     pid:33738 util:245:enumerateWithEstimate Stuffing cache  256/445, done at 2024-10-28 16:24:25, 0:00:16


2024-10-28 16:24:25.673950


In [20]:
import torch
import torch.nn as nn
from torch.optim import SGD, Adam
from model import LunaModel
from torch.utils.tensorboard import SummaryWriter

log = logging.getLogger(__name__)
# log.setLevel(logging.WARN)
log.setLevel(logging.INFO)

METRICS_LABEL_NDX = 0
METRICS_PRED_NDX = 1
METRICS_LOSS_NDX = 2
METRICS_SIZE = 3

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [22]:
def initValDl():
    val_ds = LunaDataset(
        val_stride=10,
        isValSet_bool=True,
    )
    batch_size = 128

    val_dl = DataLoader(
        val_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=device,
    )

    return val_dl


val_dl = initValDl()

2024-10-28 16:44:41,375 INFO     pid:33738 desets:202:__init__ <desets.LunaDataset object at 0x7b62227d9ed0>: 5694 validation samples


In [31]:
def initTrainDl():
    train_ds = LunaDataset(
        val_stride=10,
        isValSet_bool=False,
    )

    batch_size = 128

    train_dl = DataLoader(
        train_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=device,
    )

    return train_dl


train_dl = initTrainDl()

2024-10-28 16:49:22,065 INFO     pid:33738 desets:202:__init__ <desets.LunaDataset object at 0x7b621b1a7390>: 51244 training samples


In [19]:
def initModel():
    model = LunaModel()
    if device:
        log.info("Using CUDA; {} devices.".format(torch.cuda.device_count()))
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        model = model.to(device)
    return model


model = initModel()
# writer = SummaryWriter("./log_seq")
# writer.add_graph(model, input)
# writer.close()

2024-10-28 16:42:14,713 INFO     pid:33738 __main__:004:initModel Using CUDA; 1 devices.


In [11]:
def initOptimizer():
    return SGD(model.parameters(), lr=0.001, momentum=0.99)
    # return Adam(self.model.parameters())


optimizer = initOptimizer()

In [12]:
def computeBatchLoss(batch_ndx, batch_tup, batch_size, metrics_g):
    input_t, label_t, _series_list, _center_list = batch_tup

    input_g = input_t.to(device, non_blocking=True)
    label_g = label_t.to(device, non_blocking=True)

    logits_g, probability_g = model(input_g)

    loss_func = nn.CrossEntropyLoss(reduction="none")
    loss_g = loss_func(
        logits_g,
        label_g[:, 1],
    )
    start_ndx = batch_ndx * batch_size
    end_ndx = start_ndx + label_t.size(0)

    metrics_g[METRICS_LABEL_NDX, start_ndx:end_ndx] = label_g[:, 1].detach()
    metrics_g[METRICS_PRED_NDX, start_ndx:end_ndx] = probability_g[:, 1].detach()
    metrics_g[METRICS_LOSS_NDX, start_ndx:end_ndx] = loss_g.detach()

    return loss_g.mean()

In [13]:
def doTraining(epoch_ndx, train_dl, totalTrainingSamples_count=0):
    model.train()
    trnMetrics_g = torch.zeros(
        METRICS_SIZE,
        len(train_dl.dataset),
        device=device,
    )

    batch_iter = enumerateWithEstimate(
        train_dl,
        "E{} Training".format(epoch_ndx),
        start_ndx=train_dl.num_workers,
    )
    for batch_ndx, batch_tup in batch_iter:
        optimizer.zero_grad()

        loss_var = computeBatchLoss(
            batch_ndx, batch_tup, train_dl.batch_size, trnMetrics_g
        )

        loss_var.backward()
        optimizer.step()

    totalTrainingSamples_count += len(train_dl.dataset)

    return trnMetrics_g.to("cpu")


def doValidation(epoch_ndx, val_dl):
    with torch.no_grad():
        model.eval()
        valMetrics_g = torch.zeros(
            METRICS_SIZE,
            len(val_dl.dataset),
            device=device,
        )

        batch_iter = enumerateWithEstimate(
            val_dl,
            "E{} Validation ".format(epoch_ndx),
            start_ndx=val_dl.num_workers,
        )
        for batch_ndx, batch_tup in batch_iter:
            computeBatchLoss(batch_ndx, batch_tup, val_dl.batch_size, valMetrics_g)

    return valMetrics_g.to("cpu")

In [16]:
def logMetrics(
    epoch_ndx,
    mode_str,
    metrics_t,
    classificationThreshold=0.5,
):

    negLabel_mask = metrics_t[METRICS_LABEL_NDX] <= classificationThreshold
    negPred_mask = metrics_t[METRICS_PRED_NDX] <= classificationThreshold

    posLabel_mask = ~negLabel_mask
    posPred_mask = ~negPred_mask

    neg_count = int(negLabel_mask.sum())
    pos_count = int(posLabel_mask.sum())

    neg_correct = int((negLabel_mask & negPred_mask).sum())
    pos_correct = int((posLabel_mask & posPred_mask).sum())

    metrics_dict = {}
    metrics_dict["loss/all"] = metrics_t[METRICS_LOSS_NDX].mean()
    metrics_dict["loss/neg"] = metrics_t[METRICS_LOSS_NDX, negLabel_mask].mean()
    metrics_dict["loss/pos"] = metrics_t[METRICS_LOSS_NDX, posLabel_mask].mean()

    metrics_dict["correct/all"] = (
        (pos_correct + neg_correct) / np.float32(metrics_t.shape[1]) * 100
    )
    metrics_dict["correct/neg"] = neg_correct / np.float32(neg_count) * 100
    metrics_dict["correct/pos"] = pos_correct / np.float32(pos_count) * 100

    log.info(
        ("E{} {:8} {loss/all:.4f} loss, " + "{correct/all:-5.1f}% correct, ").format(
            epoch_ndx,
            mode_str,
            **metrics_dict,
        )
    )
    log.info(
        (
            "E{} {:8} {loss/neg:.4f} loss, "
            + "{correct/neg:-5.1f}% correct ({neg_correct:} of {neg_count:})"
        ).format(
            epoch_ndx,
            mode_str + "_neg",
            neg_correct=neg_correct,
            neg_count=neg_count,
            **metrics_dict,
        )
    )
    log.info(
        (
            "E{} {:8} {loss/pos:.4f} loss, "
            + "{correct/pos:-5.1f}% correct ({pos_correct:} of {pos_count:})"
        ).format(
            epoch_ndx,
            mode_str + "_pos",
            pos_correct=pos_correct,
            pos_count=pos_count,
            **metrics_dict,
        )
    )

    # Load traning to tensorBoard
    writer = SummaryWriter("./logs")
    for key, value in metrics_dict.items():
        writer.add_scalar(key, value, epoch_ndx)
    writer.close()
    # tensorboard --logdir=logs/fit

    # for key, value in metrics_dict.items():
    #     print(key, value)

In [17]:
epochs = 1

for epoch_ndx in range(1, epochs + 1):
    log.info(
        "Epoch {} of {}, {}/{} batches of size {}*{}".format(
            epoch_ndx,
            epochs,
            len(train_dl),
            len(val_dl),
            batch_size,
            torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        )
    )

    trnMetrics_t = doTraining(epoch_ndx, train_dl)
    logMetrics(epoch_ndx, "trn", trnMetrics_t)

    valMetrics_t = doValidation(epoch_ndx, val_dl)
    logMetrics(epoch_ndx, "val", valMetrics_t)

2024-10-28 16:32:46,877 INFO     pid:33738 __main__:004:<module> Epoch 1 of 1, 401/45 batches of size 128*cuda
2024-10-28 16:32:47,059 INFO     pid:33738 util:245:enumerateWithEstimate E1 Training    4/401, done at 2024-10-28 16:33:00, 0:00:13
2024-10-28 16:32:51,041 INFO     pid:33738 util:245:enumerateWithEstimate E1 Training   16/401, done at 2024-10-28 16:34:24, 0:01:38
2024-10-28 16:33:08,769 INFO     pid:33738 util:245:enumerateWithEstimate E1 Training   64/401, done at 2024-10-28 16:35:01, 0:02:15
2024-10-28 16:34:20,981 INFO     pid:33738 util:245:enumerateWithEstimate E1 Training  256/401, done at 2024-10-28 16:35:13, 0:02:26
2024-10-28 16:35:17,563 INFO     pid:33738 __main__:031:logMetrics E1 trn      0.0158 loss,  99.8% correct, 
2024-10-28 16:35:17,564 INFO     pid:33738 __main__:038:logMetrics E1 trn_neg  0.0020 loss, 100.0% correct (51135 of 51135)
2024-10-28 16:35:17,564 INFO     pid:33738 __main__:050:logMetrics E1 trn_pos  6.4921 loss,   0.0% correct (0 of 109)
2024-1