In [None]:
from tqdm import tqdm
from monai.networks.nets import UNet
from ply_creation_lib import create_ply
from skimage.measure import block_reduce
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
import monai.losses as losses
import torch.optim as optim
import pydicom as dicom
import torchio as tio
import numpy as np
import pickle
import torch
import time
import copy
import json
import os

In [None]:
def get_annotated_data() -> list[dict[str, np.ndarray]]:
    data = []
    for annotation_file in tqdm(os.listdir('annotations'), desc='scanning files'):
        mask = read_annotation(f'annotations\\{annotation_file}')
        scan_folder = '_'.join(annotation_file.split('_')[:2])
        scan = read_dicom(scan_folder)

        mask_limit = min(512, mask.shape[0])
        normal_size_mask = np.zeros((512, 512, 512), dtype=mask.dtype)
        normal_size_mask[:mask_limit, :, :] = mask[:mask_limit, :, :]

        normal_size_scan = np.zeros((512, 512, 512), dtype=scan.dtype)
        normal_size_scan[:mask_limit, :, :] = scan[:mask_limit, :, :]

        data.append({'scan': normal_size_scan, 'mask': normal_size_mask})
    return data

def read_dicom(input_folder: str) -> np.ndarray:
    files: list[str] = os.listdir(input_folder)
    data = [dicom.dcmread(f'{input_folder}\\{file}') for file in files if file.endswith('.dcm')]
    image = np.array([dicom.pixel_array(datum) for datum in data])
    image[image < 0] = 0
    return image

def downscale_data(data: list[dict[str, np.ndarray]], new_size: int) -> list[dict[str, np.ndarray]]:
    b_size = data[0]['mask'].shape[0] // new_size
    block_size = (b_size, b_size, b_size)

    new_data = []
    for datum in tqdm(data, desc='downscaling data'):
        downscaled_mask = downscale(datum['mask'], block_size) > 0.5
        downscaled_scan = downscale(datum['scan'], block_size)

        new_data.append({'scan': downscaled_scan, 'mask': downscaled_mask})
    return new_data

def downscale(input: np.ndarray, block_size: tuple) -> np.ndarray:
    return block_reduce(input, block_size=block_size, func=np.mean)

def save_new_data(data: list[dict[str, np.ndarray]]) -> None:
    for i, datum in enumerate(tqdm(data, desc='saving data')):
        np.save(f'd:\\dicom\\my_dataset\\base\\scans\\{i}.npy', datum['scan'])
        np.save(f'd:\\dicom\\my_dataset\\base\\masks\\{i}.npy', datum['mask'])

def save_augmented_data(data: list[tuple], folder: str = 'augmented') -> None:
    for i, datum in enumerate(tqdm(data, desc='saving data')):
        np.save(f'd:\\dicom\\my_dataset\\{folder}\\scans\\{i}.npy', datum[0])
        np.save(f'd:\\dicom\\my_dataset\\{folder}\\masks\\{i}.npy', datum[1].astype(bool))

def save_augmented_datum(data: tuple, i: int, folder: str = 'augmented') -> None:
    np.save(f'd:\\dicom\\my_dataset\\{folder}\\scans\\{i}.npy', data[0])
    np.save(f'd:\\dicom\\my_dataset\\{folder}\\masks\\{i}.npy', data[1].astype(bool))

def get_transformation():
    return tio.Compose([
        tio.RandomFlip(axes=(1, 2), p=0.5),
        tio.RandomAffine(
            degrees=(0, 0, 360),
            scales=(0.9, 1.1),
            translation=(5, 5, 5),
            p=0.5),
        tio.RandomElasticDeformation(num_control_points=7, max_displacement=5, p=0.5),
        tio.RandomGamma(p=0.5),
        tio.RandomNoise(mean=0, std=0.1, p=0.5),
        tio.RandomBiasField(p=0.5)])

def rotation_transformation():
    return tio.Compose([
        tio.RandomAffine(
            degrees=(0, 0, 360),
            scales=(0.9, 1.1),
            translation=(5, 5, 5),
            p=0.5),
        tio.RandomFlip(axes=(1, 2), p=0.5)])

def get_subject(scan: np.ndarray, mask: np.ndarray) -> tio.Subject:
    return tio.Subject(
        scan=tio.ScalarImage(tensor=np.expand_dims(scan, axis=0)),
        mask=tio.LabelMap(tensor=np.expand_dims(mask, axis=0)))

def augment(scan: np.ndarray, mask: np.ndarray, transform: tio.Compose) -> tuple[np.ndarray, np.ndarray]:
    subject = get_subject(scan, mask)
    augmented_subject = transform(subject)
    aug_scan: np.ndarray = augmented_subject['scan'].numpy()[0]
    aug_mask: np.ndarray = augmented_subject['mask'].numpy()[0]
    return (aug_scan, aug_mask)

def data_augmentation(downscaled_data: list[dict[str, np.ndarray]], augment_count: int, nii: bool):
    transform = rotation_transformation() if nii else get_transformation()

    augmented_data = []
    for i, datum in enumerate(downscaled_data):
        if nii:
            scan, mask = datum
        else:
            scan = datum['scan']
            mask = datum['mask']

        for _ in tqdm(range(augment_count), desc=f'augmenting data {i+1}/{len(downscaled_data)}'):
            aug_scan, aug_mask = augment(scan, mask, transform)
            aug_scan = aug_scan.astype(np.int16)
            aug_mask = aug_mask.astype(bool)
            augmented_data.append((aug_scan, aug_mask))
    return augmented_data

def get_prepared_data(folder: str) -> list[dict[str, np.ndarray]]:
    filenames = os.listdir(f'd:\\dicom\\my_dataset\\{folder}\\scans')
    filenames.sort(key = lambda x: int(x.split('.')[0]))

    data = []
    for filename in tqdm(filenames, desc='loading data'):
        scan = np.load(f'd:\\dicom\\my_dataset\\{folder}\\scans\\{filename}').astype(np.int16)
        mask = np.load(f'd:\\dicom\\my_dataset\\{folder}\\masks\\{filename}').astype(np.int16)
        data.append((scan, mask))
    return data

def read_annotation(annotation_file: str) -> np.ndarray:
    return np.load(annotation_file)

def get_data(from_annotations: bool, new_data_size: int, augment_count: int, folder: str = 'nii_augmented'):
    if from_annotations:
        data = get_annotated_data()

        data = downscale_data(data, new_data_size)
        save_new_data(data)
        augmented_data = data_augmentation(data, augment_count, nii=False)
        save_augmented_data(augmented_data)
        return augmented_data
    
    return get_prepared_data(folder)

In [None]:
train_size = 0.8
test_size = 0.5
new_data_size = 128
augment_count = 4
from_annotations = True

model_output_folder = f'{os.getcwd()}\\models'

In [None]:
get_data()

In [None]:
downscaled_data = get_data(from_annotations, new_data_size, augment_count)

In [None]:
class DataLoaderDataset(Dataset):
    def __init__(self, data_folder: str, filenames: list):
        self.data_folder = data_folder
        self.filenames = filenames

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        filename = self.filenames[idx]
        scan = np.load(f'{self.data_folder}\\scans\\{filename}')
        mask = np.load(f'{self.data_folder}\\masks\\{filename}')
        scan = torch.tensor(scan, dtype=torch.float32).unsqueeze(0)
        mask = torch.tensor(mask, dtype=torch.float32).unsqueeze(0)
        return (scan, mask)

selected_data_folder = 'nii_augmented'
location_folder = f'D:\\dicom\\my_dataset\\{selected_data_folder}'
filenames = os.listdir(f'{location_folder}\\scans')

validation_filenames, training_filenames = train_test_split(filenames, test_size=int(len(filenames)*train_size), random_state=11)
validation_filenames, testing_filenames = train_test_split(validation_filenames, test_size=int(len(validation_filenames)*test_size), random_state=11)


train_dataset = DataLoaderDataset(location_folder, training_filenames)
val_dataset = DataLoaderDataset(location_folder, validation_filenames)
test_dataset = DataLoaderDataset(location_folder, testing_filenames)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=True, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=True, num_workers=0, pin_memory=True)

print(len(train_dataset))

In [None]:
class TrainingDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        scan, mask = self.data_list[idx]
        scan = torch.tensor(scan, dtype=torch.float32).unsqueeze(0)
        mask = torch.tensor(mask, dtype=torch.float32).unsqueeze(0)
        return scan, mask

validation_filenames, training_data = train_test_split(downscaled_data, test_size=int(len(downscaled_data)*train_size), random_state=11)
validation_filenames, testing_data = train_test_split(validation_filenames, test_size=int(len(validation_filenames)*test_size), random_state=11)

train_dataset = TrainingDataset(training_data)
val_dataset = TrainingDataset(validation_filenames)
test_dataset = TrainingDataset(testing_data)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=True, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=True, num_workers=0, pin_memory=True)

In [None]:
model = UNet(
    spatial_dims=3,
    in_channels=1,
    out_channels=1,
    channels=(16, 32, 64, 128, 256),
    strides=(2, 2, 2, 2),
    num_res_units=1
).to('cuda')

criterion = losses.DiceLoss(sigmoid=True)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
def dice_coefficient(pred, target):
    smooth = 1e-6
    intersection = (pred * target).sum()
    return (2. * intersection + smooth) / (pred.sum() + target.sum() + smooth)

num_epochs = 100


metrics = {'train_loss': [], 'train_dice': [], 'val_loss': [], 'val_dice': []}
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    epoch_dice = 0.0

    for scans, masks in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
        scans, masks = scans.to('cuda'), masks.to('cuda')

        optimizer.zero_grad()
        outputs = model(scans)

        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        outputs_bin = torch.sigmoid(outputs) > 0.5
        dice = dice_coefficient(outputs_bin, masks)

        epoch_loss += loss.item()
        epoch_dice += dice.item()

    epoch_loss /= len(train_loader)
    epoch_dice /= len(train_loader)

    train_results = f'training - loss: {epoch_loss:.4f}, dice: {epoch_dice:.4f}'

    metrics['train_loss'].append(epoch_loss)
    metrics['train_dice'].append(epoch_dice)



    model.eval()
    val_loss = 0.0
    val_dice = 0.0

    with tqdm(val_loader, desc='Validating', leave=False) as vbar:
        with torch.no_grad():
            for test_scans, test_masks in vbar:
                test_scans, test_masks = test_scans.to('cuda'), test_masks.to('cuda')
                test_outputs = model(test_scans)
                val_loss += criterion(test_outputs, test_masks).item()
                val_dice += dice_coefficient(test_outputs, test_masks).item()

    val_loss /= len(val_loader)
    val_dice /= len(val_loader)

    val_results = f'validation - loss: {val_loss:.4f}, dice: {val_dice:.4f}'

    metrics['val_loss'].append(val_loss)
    metrics['val_dice'].append(val_dice)

    print(f'{train_results} | {val_results}', end='')
    # print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, Dice Coeff: {epoch_dice:.4f}')

In [None]:
last_model_number = max(int(i[5:]) for i in os.listdir(model_output_folder))
selected_folder = f'{model_output_folder}\\train{last_model_number+1}'
os.mkdir(selected_folder)

torch.save(model.state_dict(), f'{selected_folder}\\model.pth')
with open(f'{selected_folder}\\metrics.json', 'w') as f:
    json.dump(metrics, f, indent=4)

print(f'saved as \"{selected_folder}\"')

In [None]:
print(f'{selected_folder}\\model.pth')
print(metrics)
model.load_state_dict(torch.load(f'{selected_folder}\\model.pth'))
with open(f'{selected_folder}\\metrics.json', 'r') as f:
    metrics = json.load(f)

In [None]:
plt.figure(figsize=(10, 8))
for key in metrics:
    plt.plot(metrics[key])

plt.ylim([-0.1, 1.1])
plt.grid(which='both')
plt.legend(metrics.keys())
plt.show()

In [None]:
model.eval()
test_loss = 0.0
test_dice = 0.0

with tqdm(test_loader, desc='Testing', leave=False) as vbar:
    with torch.no_grad():
        for test_scans, test_masks in vbar:
            test_scans, test_masks = test_scans.to('cuda'), test_masks.to('cuda')
            test_outputs = model(test_scans)
            test_loss += criterion(test_outputs, test_masks).item()
            test_dice += dice_coefficient(test_outputs, test_masks).item()

print(f'loss: {test_loss/len(test_loader)}')
print(f'dice: {test_dice/len(test_loader)}')