# Hyperparameter search for T0 segmentation
with sparse grid search

author = Caroline Magg <br>
date = 25 May 2020 <br>

___________________________________
history: <br>
2020-25-05 Run first hyperparameter search <br>
2020-25-05 Run hyperparameter search for CTV1, CTV2 <br>
2020-01-07 Run hyperparametersearch for T0 for all structures with a more educated guess about the range of parameters (from frist run) <br>
2020-27-07 Find best parameters for all structures independent of patient (use majority vote)

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import pydicom
from natsort import natsorted
import scipy
import time
import logging as log
import skimage.segmentation as segmentation
import itertools

In [None]:
log.basicConfig(format='%(levelname)s:%(message)s', level=log.INFO)

### Add dependencies

In [None]:
# add KidsBrainProject main folder to paths
sys.path.append(os.path.abspath('../../'))
sys.path.append(os.path.abspath('../utils/'))

In [None]:
#from utils_explore import read_structure,read_contour,read_contour_names,read_contour_row
from PatientData import PatientData
from DicomWrapper import DicomWrapper
from Segmentation import Segmentation
from HyperparameterSearchSegmentation import HyperparameterSearchSegmentation as HyperparamSearch

In [None]:
# add path to data here
path_data = "../../Data/" 

# Contour list

In [None]:
contours_list = pd.read_csv("list_contours_old01.csv",delimiter=';')
contours_list

# HyperparameterSearch 1st Iteration

In [None]:
contour_of_interest = ["Brain",
                        "Cerebell POST YL", 
                        "Cingulum left", "Cingulum right",
                        "Fornix",
                        "Hypothalamus",
                        "TemporalLobeLt", "TemporalLobeRt",
                        "PTV1","PTV2","GTV","CTV",
                        "Scalp",
                        "Corpus callosum",
                        "Thalamus left", "Thalamus right", "Thalamus ant L", "Thalamus ant R",
                        "PapezCircle"]
len(contour_of_interest)

In [None]:
contour_of_interest = ["CTV1",
                        "CTV2"]
len(contour_of_interest)

In [None]:
params = {#'w_edge': np.arange(0.1,1.1,0.2),    
         'kernel_size': np.arange(5,11,5),
         'beta': np.arange(0.05, 0.2, 0.05),
         'max_iteration': np.arange(2,30,3)}

In [None]:
params = {#'w_edge': np.arange(0.1,1.1,0.2),    
         'kernel_size': np.arange(5,11,5),
         'beta': [0.05, 0.1, 0.2],
         'max_iteration': [2,5,8,15,20,30]}
params

In [None]:
liste = []
for k in params.keys():
    liste.append(params[k])
combinations = list(itertools.product(liste[0], liste[1], liste[2]))
print("# combinations", len(combinations))

In [None]:
for idx in range(2,10):
    print("folder", idx)
    t = time.time()
    files = os.listdir(os.path.join(path_data, str(idx)))
    path_contour = os.path.join(path_data, str(idx), 'RS.Jacks{0}.dcm'.format(idx))
    path_preop = os.path.join(path_data, str(idx), 'CT')
    path_postop = [os.path.join(path_data, str(idx), fn) for fn in [x for x in files if 'T1' in x or 'MRT' in x]]
    data = PatientData(path_preop, path_postop, path_contour)
    data.read_filtered_contour(roiname=contour_of_interest, mode="exact")
    segmentor = Segmentation(data, debug=True)
    search = HyperparamSearch(segmentor)
    name = "list_hyperparameter_search_folder{0}_ctv.csv".format(idx)
    result = search.eval(params, name)
    elapsed = time.time() - t
    print(elapsed)

# Contour List

In [None]:
contours_list = pd.read_csv("list_contours_old01.csv",delimiter=';')
contours_list

# HyperparameterSearch 1st Iteration

In [None]:
def read_values_from_csv(df):
    parameters = []
    for idx in range(len(df)):
        v = df.loc[idx]['Values'].replace('(','').replace(')','').split(',')
        params = {'kernel_size': np.arange(int(v[0])-1, int(v[0])+2),
                 'beta': np.arange(max(0.05,float(v[1])-0.05), float(v[1])+0.05,0.05),
                 'max_iteration': np.arange(max(1,int(v[2])-2), int(v[2])+3),
                 'struct': df.loc[idx]['Struct']}
        parameters.append(params)
        
    return parameters

In [None]:
# example for finer grid search
read_values_from_csv(pd.read_csv("list_hyperparameter_search_folder{0}.csv".format(1)))

In [None]:
contours_of_interest = contours_list['RoiName'].values
contours_of_interest, len(contours_of_interest)

In [None]:
for idx in range(1,10):
    print("folder", idx)
    t = time.time()
    files = os.listdir(os.path.join(path_data, str(idx)))
    path_contour = os.path.join(path_data, str(idx), 'RS.Jacks{0}.dcm'.format(idx))
    path_preop = os.path.join(path_data, str(idx), 'CT')
    path_postop = [os.path.join(path_data, str(idx), fn) for fn in [x for x in files if 'T1' in x or 'MRT' in x]]
    data = PatientData(path_preop, path_postop, path_contour)
    data.read_filtered_contour(roiname=contours_of_interest, mode="exact")
    segmentor = Segmentation(data, debug=True)
    params = read_values_from_csv(pd.read_csv("list_hyperparameter_search_folder{0}.csv".format(idx)))
    search = HyperparamSearch(segmentor)
    name = "list_hyperparameter_search_folder{0}_finer.csv".format(idx)
    result = search.eval(params, name)
    elapsed = time.time() - t
    print(elapsed)
    break

In [None]:
for idx in range(2,10):
    print("folder", idx)
    t = time.time()
    files = os.listdir(os.path.join(path_data, str(idx)))
    path_contour = os.path.join(path_data, str(idx), 'RS.Jacks{0}.dcm'.format(idx))
    path_preop = os.path.join(path_data, str(idx), 'CT')
    path_postop = [os.path.join(path_data, str(idx), fn) for fn in [x for x in files if 'T1' in x or 'MRT' in x]]
    data = PatientData(path_preop, path_postop, path_contour)
    data.read_filtered_contour(roiname=contours_of_interest, mode="exact")
    segmentor = Segmentation(data, debug=True)
    params = read_values_from_csv(pd.read_csv("list_hyperparameter_search_folder{0}.csv".format(idx)))
    search = HyperparamSearch(segmentor)
    name = "list_hyperparameter_search_folder{0}_finer.csv".format(idx)
    result = search.eval(params, name)
    elapsed = time.time() - t
    print(elapsed)

# Find best parameters for all structures (independent of patient)

In [None]:
contours_list = pd.read_csv("list_contours_old01.csv",delimiter=';')
contours_list

In [None]:
def convert_string_to_params(s):
    l = s.replace('(','').replace(')','').split(',')
    return int(l[0]), float(l[1]), int(l[0])

def get_majority_vote(val):
    possibilities = set(val)
    counts = []
    for combi in possibilities:
        counts.append(values.count(combi))
    print(possibilities, counts)
    return list(possibilities)[np.argmax(counts)], np.max(counts)

In [None]:
for j, contour in enumerate(contours_list['RoiName']):
    print(contour)
    values = []
    for idx in range(1,10):
        gt = pd.read_csv("list_hyperparameter_search_folder{0}_finer.csv".format(idx))
        if contour in gt['Struct'].values:            
            print("folder", idx)
            i = np.where(gt['Struct']==contour)[0][0]
            values.append(convert_string_to_params(gt.loc[i]['Values']))
        else:
            print('folder {0}: not available'.format(idx))
    best_value, best_counts = get_majority_vote(values)
    print(best_value, best_counts)
    contours_list.loc[j,'Values'] = str(best_value)

In [None]:
contours_list

In [None]:
contours_list.to_csv("list_contours.csv", index=False)