# Training Unet & Attention Unet


## Dependencies
Install, load, and initialize all required dependencies for this experiment.

### Install Dependencies

In [None]:
import sys
!{sys.executable} -m pip install -q -e ../../utils/

### Import Dependencies

# System libraries

In [None]:
from __future__ import absolute_import, division, print_function
import logging, os, sys

# Enable logging
logging.basicConfig(format='[%(levelname)s] %(message)s', level=logging.INFO, stream=sys.stdout)

# Re-import packages if they change

%load_ext autoreload
%autoreload 2

# Recursion Depth
sys.setrecursionlimit(1000000000)

# Intialize tqdm to always use the notebook progress bar
import tqdm
tqdm.tqdm = tqdm.tqdm_notebook

# Third-party libraries
import comet_ml

import numpy as np
import pandas as pd
import nilearn.plotting as nip
import matplotlib.pyplot as plt
import nibabel as nib
import numpy as np
import torch
import collections
%matplotlib inline
plt.rcParams["figure.figsize"] = (12,6)
%config InlineBackend.figure_format='retina'  # adapt plots for retina displays
import git
import time

# Project utils

import aneurysm_utils
from aneurysm_utils import evaluation, training,preprocessing


In [None]:
if "workspace" in os.getcwd():
    ROOT = "/workspace" # local 
elif "/group/cake" in os.getcwd(): 
    ROOT = "/group/cake" # Jupyter Lab


### Initialize Environment

In [None]:
env = aneurysm_utils.Environment(project="our-git-project", root_folder=ROOT)
env.cached_data["comet_key"] = "" # Add comet key here to track experiments
env.print_info()

## Load Data
Download, explore, and prepare all required data for the experiment in this section.

In [None]:
dataset_params = {
    "prediction": "mask",
    "mri_data_selection": "", 
    "balance_data": False,
    "seed": 1,
    "resample_voxel_dim": (1.2, 1.2, 1.2)
}

preprocessing_params = {
    'min_max_normalize': True,
    'mean_std_normalize': False,
    'smooth_img': False, # can contain a number: smoothing factor
    'intensity_segmentation': False
}


### Load Meta Data

In [None]:
from aneurysm_utils.data_collection import load_aneurysm_dataset

df = load_aneurysm_dataset(
    env,
    mri_data_selection=dataset_params["mri_data_selection"],
    random_state=dataset_params["seed"]
)
df.head()

### Load & Split MRI Data

In [None]:
# Load MRI images and split into train, test, and validation
from aneurysm_utils.data_collection import split_mri_images

train_data, test_data, val_data, _ = split_mri_images(
    env, 
    df, 
    prediction=dataset_params["prediction"], 
    encode_labels=False,
    random_state=dataset_params["seed"],
    balance_data=dataset_params["balance_data"],
    resample_voxel_dim=dataset_params["resample_voxel_dim"]
)

mri_imgs_train, labels_train,train_participants = train_data
mri_imgs_test, labels_test,test_participants = test_data
mri_imgs_val, labels_val,val_participants = val_data

In [None]:
from aneurysm_utils import preprocessing

most_commen_shape = preprocessing.check_mri_shapes(mri_imgs_train)

## Transform & Preprocess Data

In [None]:
size = most_commen_shape 
train_index = [i for i, e in enumerate(mri_imgs_train) if e.shape != size]
mri_imgs_train = [i for j, i in enumerate(mri_imgs_train) if j not in train_index]
labels_train = [i for j, i in enumerate(labels_train) if j not in train_index]

test_index = [i for i, e in enumerate(mri_imgs_test) if e.shape != size]
mri_imgs_test = [i for j, i in enumerate(mri_imgs_test) if j not in test_index]
labels_test = [i for j, i in enumerate(labels_test) if j not in test_index]

val_index = [i for i, e in enumerate(mri_imgs_val) if e.shape != size]
mri_imgs_val = [i for j, i in enumerate(mri_imgs_val) if j not in val_index]
labels_val = [i for j, i in enumerate(labels_val) if j not in val_index]

mri_imgs_train[0].shape
preprocessing.check_mri_shapes(mri_imgs_train)
print(np.unique(labels_val[0], return_counts=True))

In [None]:
from aneurysm_utils import preprocessing
patch_size = 64
size_of_train = len(mri_imgs_train)
size_of_test = len(mri_imgs_test)
size_of_val = len(mri_imgs_val)

# preprocess all lists as one to have a working mean_std_normalization
mri_imgs = mri_imgs_train + mri_imgs_test + mri_imgs_val
mri_imgs = preprocessing.preprocess(env, mri_imgs, preprocessing_params)

###creating patches
mri_imgs_train = np.asarray(mri_imgs[:size_of_train])
mri_imgs_train = preprocessing.patch_list(mri_imgs_train,patch_size)
mri_imgs_test = np.asarray(mri_imgs[size_of_train : size_of_train + size_of_test])
mri_imgs_test = preprocessing.patch_list(mri_imgs_test,patch_size)
mri_imgs_val = np.asarray(mri_imgs[size_of_train + size_of_test :])
mri_imgs_val = preprocessing.patch_list(mri_imgs_val,patch_size)

# preprocess mask
x, y, h = labels_train[0].shape
labels_train = patch_list(labels_train,patch_size)
labels_test = patch_list(labels_test,patch_size)
labels_val = patch_list(labels_val,patch_size)

### Optional: View image

In [None]:
idx = 0
nip.view_img(
    nib.Nifti1Image(mri_imgs_train[0], np.eye(4)),
    symmetric_cmap=False,
    cmap="Greys_r",
    bg_img=False,
    black_bg=True,
    threshold=1e-03, 
    draw_cross=False
)

In [None]:
evaluation.plot_slices(mri_train[0])

In [None]:
mri_imgs_train = np.array(mri_imgs_train)
labels_train = np.array(labels_train)
mri_imgs_val = np.array(mri_imgs_val)
labels_val = np.array(labels_val)
mri_imgs_test = np.array(mri_imgs_test)
labels_test = np.array(labels_test)

## nach der zelle 3.3gb

## Train Model
Implementation, configuration, and evaluation of the experiment.

### Train Deep Model 3D data

In [None]:
from comet_ml import Optimizer




artifacts = {
    "train_data": (mri_imgs_train, labels_train),
    "val_data": (mri_imgs_val, labels_val),
    "test_data": (mri_imgs_test, labels_test)
}

params = {
    "batch_size": 6,
    "epochs": 45,
    "es_patience": 3, # None = deactivate early stopping
    "model_name": 'Unet3D_Oktay',
    "optimizer_momentum": 0.9,
    "optimizer":'Adam',
    "scheduler": 'ReduceLROnPlateau',
    "criterion": "DiceCELoss",
    "sampler": None,   #'ImbalancedDatasetSampler2',
    "shuffle_train_set": True,
    "save_models":True,
    "debug": False,
    "criterion_weights": 1,
    "learning_rate": 1e-4,
    "use_cuda":True,
    "feature_scale": 2,
    
    
}

params.update(dataset_params)
params.update(preprocessing_params)

config = {
    # We pick the Bayes algorithm:
    "algorithm": "bayes",
    # Declare your hyperparameters in the Vizier-inspired format:
    "parameters": {
       "learning_rate": {"type": "float", "scalingType": "loguniform", "min": 1e-4, "max": 1e-3},
         
    },
    # Declare what we will be optimizing, and how:
    "spec": {"metric": "train_bal_acc", "objective": "maximize"},  #test balance accuracy
}


opt = Optimizer(config, api_key=env.cached_data["comet_key"])

## 3.3gb

In [None]:
import gc
gc.collect()

In [None]:
# Finally, get experiments, and train your models:
for comet_exp in opt.get_experiments(project_name=env.project + "-" + params["prediction"]):
    print(comet_exp)
    param_copy = params.copy()
    comet_exp.params
    param_copy["learning_rate"] = comet_exp.get_parameter("learning_rate")
    

    exp = env.create_experiment(
        params["prediction"] + "-pytorch-" + params["model_name"], comet_exp
    ) #params["selected_label"] + "-hyperopt-" + params["model_name"]
    exp.run(training.train_pytorch_model, param_copy, artifacts)
    
    time.sleep(3)
    del exp
    import gc
    gc.collect()

