# Find threshold for deepseg_sc

This notebook runs an algorithm that finds the best combination of threshold for each contrast, by minimizing the outpout CSA associated with each threshold.

In [1]:
import os
import numpy as np
import pandas as pd
import itertools

In [106]:
folder_csv = '/Volumes/projects/sct_deepseg_threshold/20191009'

thr = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]  # thresholds to test
contrasts = ['T1', 'T2', 'T2s', 'MTon', 'T1w', 'DWI']  # various contrasts

# Get the number of subjects from the T1 scan, which has all the subjects
df = pd.read_csv(os.path.join(folder_csv, 'csa-T1_0.0.csv'))
n_sub = len(df)

# Read the csv files across contrasts and thresholds
df = {}
for (i_thr, i_contrast) in [(x, y) for x in range(len(thr)) for y in range(len(contrasts))]:
    csv_file = os.path.join(folder_csv, 'csa-{}_{}.csv'.format(contrasts[i_contrast], thr[i_thr]))
    df[i_contrast, i_thr] = pd.read_csv(csv_file)

def retrieve_subject(dataframe):
    """Retrieve subject name from dataframe"""
    return os.path.split(df[0, 0]['Filename'][i_sub])[1].split('_')[0]

# Loop across subjects and populate csa array
csa = []
for i_sub in range(n_sub):
    populate = True
    # Retrieve subject name for the first contrast
    sub = retrieve_subject(df[0, 0])
    # initialize tmp dic to populate contrast and thr info for this subject
    csatmp = {}
    # Loop across threshold and contrast
    for (i_thr, i_contrast) in [(x, y) for x in range(len(thr)) for y in range(len(contrasts))]:
        # Retrieve line corresponding to subject
        dftmp = df[i_contrast, i_thr][df[i_contrast, i_thr]['Filename'].str.contains(sub)]
        if not len(dftmp) == 1:
            print('WARNING: Missing entry for i_sub: {}, thr: {}, contrast: {}'.format(i_sub, thr[i_thr], contrasts[i_contrast]))
            populate = False
            break
        else:
            try:
                # Retrieve the first element of the temporary dataframe (knowing there is only one element)
                # Note: we force it to float, because the presence of "None" in the df converts the whole df to string.
                csatmp[i_contrast, i_thr] = float(dftmp['MEAN(area)'].iloc[0])
            except ValueError as err:
                print('ValueError: {}: i_sub: {}, thr: {}, contrast: {}'.format(err, i_sub, thr[i_thr], contrasts[i_contrast]))
                populate = False
                break
    # If everything went well, populate csa variable
    if populate:
        print("Populate for i_sub: {}".format(i_sub))
        csa.append(csatmp)

Populate for i_sub: 0
Populate for i_sub: 1
Populate for i_sub: 2
Populate for i_sub: 3
Populate for i_sub: 4
Populate for i_sub: 5
Populate for i_sub: 6
Populate for i_sub: 7
Populate for i_sub: 8
Populate for i_sub: 9
Populate for i_sub: 10
Populate for i_sub: 11
Populate for i_sub: 12
Populate for i_sub: 13
Populate for i_sub: 14
Populate for i_sub: 15
Populate for i_sub: 16
Populate for i_sub: 20
Populate for i_sub: 21
Populate for i_sub: 23
Populate for i_sub: 24
Populate for i_sub: 29
Populate for i_sub: 30
ValueError: could not convert string to float: 'None': i_sub: 31, thr: 0.0, contrast: T2s
Populate for i_sub: 32
Populate for i_sub: 33
Populate for i_sub: 34
Populate for i_sub: 35
Populate for i_sub: 36
Populate for i_sub: 37
Populate for i_sub: 38
Populate for i_sub: 39
Populate for i_sub: 40
Populate for i_sub: 41
Populate for i_sub: 42
Populate for i_sub: 43
Populate for i_sub: 44
Populate for i_sub: 45
Populate for i_sub: 46
Populate for i_sub: 47
Populate for i_sub: 48


In [104]:
len(csa[13])

KeyError: 13

In [109]:
thr_sub = []

# Update number of subjects based on those who were complete
n_sub = len(csa)

# Loop across subjects
for i_sub in range(n_sub):
    csa_all = []
    ind_all = []
    # Loop across all possible permutations of thresholds and contrasts
    for i_thr in list(itertools.permutations(range(len(thr)), len(contrasts))):
        # Compute the STD of CSA across contrasts, for this particular combinations of thresholds
        csa_sub_tmp = [csa[i_sub][x, i_thr[x]] for x in range(len(contrasts))]
        csa_all.append(np.std(csa_sub_tmp))
        ind_all.append(i_thr)
#         print(csa_all)
    # Find the minimum across all CSA_STD, for this particular subject
    thr_sub.append([thr[x] for x in ind_all[np.argmin(csa_all)]])

    thr_sub_mean = np.mean(thr_sub, axis=0)
    print("Subject: {:03}/{}. Average threshold: {}".format(i_sub, n_sub, thr_sub_mean))

# Average the threshold per contrast
# print(thr_sub)


Subject: 000/232. Average threshold: [0.8 0.2 0.9 0.4 0.1 0. ]
Subject: 001/232. Average threshold: [0.8  0.25 0.9  0.3  0.1  0.  ]
Subject: 002/232. Average threshold: [0.6 0.3 0.9 0.3 0.1 0. ]
Subject: 003/232. Average threshold: [0.65  0.275 0.9   0.3   0.1   0.   ]
Subject: 004/232. Average threshold: [0.68 0.24 0.9  0.28 0.14 0.  ]
Subject: 005/232. Average threshold: [0.7        0.23333333 0.9        0.35       0.13333333 0.        ]
Subject: 006/232. Average threshold: [0.71428571 0.28571429 0.9        0.31428571 0.11428571 0.02857143]
Subject: 007/232. Average threshold: [0.725  0.2625 0.9    0.35   0.125  0.025 ]
Subject: 008/232. Average threshold: [0.73333333 0.25555556 0.9        0.34444444 0.12222222 0.02222222]
Subject: 009/232. Average threshold: [0.74 0.3  0.9  0.33 0.12 0.02]
Subject: 010/232. Average threshold: [0.74545455 0.33636364 0.9        0.31818182 0.11818182 0.01818182]
Subject: 011/232. Average threshold: [0.75       0.325      0.9        0.31666667 0.1166666

Subject: 082/232. Average threshold: [0.69759036 0.35301205 0.89518072 0.3373494  0.10240964 0.01927711]
Subject: 083/232. Average threshold: [0.69880952 0.35714286 0.8952381  0.33809524 0.10119048 0.0202381 ]
Subject: 084/232. Average threshold: [0.7        0.35647059 0.89529412 0.33647059 0.10117647 0.02      ]
Subject: 085/232. Average threshold: [0.70116279 0.35465116 0.89534884 0.33604651 0.10116279 0.01976744]
Subject: 086/232. Average threshold: [0.69885057 0.35287356 0.8954023  0.33908046 0.10114943 0.01954023]
Subject: 087/232. Average threshold: [0.7        0.35227273 0.89545455 0.33977273 0.10113636 0.01931818]
Subject: 088/232. Average threshold: [0.7011236  0.35168539 0.89550562 0.33707865 0.10224719 0.01910112]
Subject: 089/232. Average threshold: [0.70222222 0.35555556 0.89555556 0.33444444 0.10444444 0.01888889]
Subject: 090/232. Average threshold: [0.7032967  0.35384615 0.8956044  0.33516484 0.1043956  0.01868132]
Subject: 091/232. Average threshold: [0.70434783 0.3576

Subject: 161/232. Average threshold: [0.72592593 0.35679012 0.89074074 0.33703704 0.10679012 0.01728395]
Subject: 162/232. Average threshold: [0.72392638 0.35582822 0.89079755 0.33680982 0.10674847 0.01717791]
Subject: 163/232. Average threshold: [0.72439024 0.35792683 0.89085366 0.33658537 0.10670732 0.01707317]
Subject: 164/232. Average threshold: [0.72484848 0.3569697  0.89090909 0.33636364 0.10606061 0.01757576]
Subject: 165/232. Average threshold: [0.7253012  0.3560241  0.89096386 0.33614458 0.1060241  0.01746988]
Subject: 166/232. Average threshold: [0.72634731 0.35568862 0.89041916 0.33532934 0.10598802 0.01736527]
Subject: 167/232. Average threshold: [0.72678571 0.3577381  0.89047619 0.33452381 0.10595238 0.0172619 ]
Subject: 168/232. Average threshold: [0.72721893 0.35680473 0.89053254 0.33609467 0.10591716 0.01715976]
Subject: 169/232. Average threshold: [0.72764706 0.35647059 0.89058824 0.33529412 0.10588235 0.01705882]
Subject: 170/232. Average threshold: [0.72807018 0.3584