In [1]:
import os
os.getcwd()
%cd ..

/cluster/home/klugh/MIMIC/mimic


In [2]:
from pathlib import Path
import pandas as pd
import argparse

from tensorflow.python.summary.summary_iterator import summary_iterator
import json
import os
import numpy as np
from mimic.utils.filehandling import get_config_path, expand_paths
from mimic.notebooks.utils.boilerplate import test_clfs, test_clf, test_dummy
from mimic.utils.flags import parser


# Test of the latent representations of the MMVAE
### Average precision (AP) scores on the latent representations, averaged over the batches.
### AP summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold, with the increase in recall from the previous threshold used as the weight.
### Note: For medical data, the true negative rate (recall) is probably more important.

## The labels are not evenly distributed over the datasets, this needs to be taken into account when analyzing classification scores.

In [3]:
alphabet_path = os.path.join(str(Path(os.getcwd())), 'alphabet.json')
with open(alphabet_path) as alphabet_file:
    alphabet = str(''.join(json.load(alphabet_file)))

FLAGS = parser.parse_args([])

config_path = get_config_path()
with open(config_path, 'rt') as json_file:
    t_args = argparse.Namespace()
    t_args.__dict__.update(json.load(json_file))
    FLAGS = parser.parse_args('', namespace=t_args)
    
FLAGS = expand_paths(FLAGS)
FLAGS.dir_data = os.path.expanduser(FLAGS.dir_data)
dir_data = FLAGS.dir_data
dir_dataset = os.path.join(dir_data, 'files_small_128')

In [4]:
labels = ['Lung Opacity', 'Pleural Effusion', 'Support Devices']
test_labels_path = os.path.join(dir_dataset, 'eval_labels.csv')
test_labels_df = pd.read_csv(test_labels_path)[labels].fillna(0)


In [5]:
counts = test_labels_df[test_labels_df == 1].count()
print(counts)

Lung Opacity        54
Pleural Effusion    29
Support Devices     12
dtype: int64


In [6]:
FLAGS.num_features = len(alphabet)
FLAGS.batch_size = 300
FLAGS.str_experiment = 'temp'
FLAGS.device = 'cuda'
FLAGS.dir_gen_eval_fid = ''
FLAGS.alpha_modalities = [FLAGS.div_weight_uniform_content, FLAGS.div_weight_m1_content,
                          FLAGS.div_weight_m2_content, FLAGS.div_weight_m3_content]

## A random classifier that classifies with a "most frequent" strategy, would have a mean average precision of 0.07

In [7]:
FLAGS.img_size = 128
average_precisions = test_dummy(FLAGS, alphabet)
print(np.mean(average_precisions))

setting dataset
setting modalities
setting model
setting clfs
setting rec_weights
dict_keys(['real', 'random', '', 'PA', 'Lateral', 'text', 'Lateral_PA', 'PA_text', 'Lateral_text', 'Lateral_PA_text'])
0.06333333333333334


In [8]:
checkpoint_path = os.path.expanduser(FLAGS.dir_fid)
lr_evals = dict()
for modality_method in ['moe']:
    for factorization in os.listdir(os.path.join(checkpoint_path, modality_method)):
        for experiment in os.listdir(os.path.join(checkpoint_path, modality_method, factorization)):
            if experiment.startswith('Mimic'):
                lr_evals[experiment] = dict()
                lr_eval = lr_evals[experiment]
                experiment_dir_ = os.path.join(checkpoint_path, modality_method, factorization, experiment)
                lr_eval_dir = os.path.join(experiment_dir_, 'logs', 'Latent Representation')
                if os.path.exists(lr_eval_dir):
                    for label in os.listdir(lr_eval_dir):
                        lr_eval[label] = dict()
                        for lr in os.listdir(os.path.join(lr_eval_dir, label)):
                            lr_eval[label][lr] = dict()
                            for logfile in os.listdir(os.path.join(lr_eval_dir, label, lr)):
                                for summary in summary_iterator(os.path.join(lr_eval_dir, label, lr, logfile)):
                                    value = summary.summary.value
                                    temp = str(value).split('\n')
                                    for elem in temp:
                                        elem = elem
                                        if elem.startswith('simple_value'):
                                            lr_eval[label][lr][summary.step] = elem.split(' ')[1]

Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


In [9]:
experiments_dataframe = pd.read_csv('experiments_dataframe.csv')
dfs = []
for experiment in lr_evals.keys():
    experiment_evals = lr_evals[experiment]
    if experiment_evals:
        for label in experiment_evals.keys():
            for lr in experiment_evals[label].keys():
                steps = experiment_evals[label][lr].keys()
                max_step = max(steps)
                experiment_evals[label][lr] = experiment_evals[label][lr][max_step]

        df = pd.DataFrame(experiment_evals).astype(float)
        index = df.index
        index.name = f'Steps: {max_step}'
        df['mean_AP'] = df.mean(numeric_only=True, axis=1)
        dfs.append((df, experiment))
    

In [10]:
for df, experiment in dfs:
    if len(experiments_dataframe.loc[experiments_dataframe['experiment_uid'] == experiment]) == 1:
        flags = experiments_dataframe.loc[experiments_dataframe['experiment_uid'] == experiment]

        print(f'Experiment {experiment} with text encoding: {flags.text_encoding.item()}, '
              f'image size: {flags.img_size.item()}, method: {flags.method.item()} \n and trained '
              f'for {flags.total_epochs.item()} epochs with batch size: {flags.batch_size.item()} '
              f'and {flags.steps_per_training_epoch.item()} steps per training epoch')
        display(df)

Experiment Mimic_2020_11_01_21_52_41_837902 with text encoding: char, image size: 256.0, method: joint_elbo 
 and trained for 98.0 epochs with batch size: 100.0 and 200.0 steps per training epoch


Unnamed: 0_level_0,Pleural Effusion,Support Devices,Lung Opacity,mean_AP
Steps: 20500,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PA_text,0.065556,0.02,0.16624,0.083932
Lateral_PA_text,0.177778,0.02,0.138676,0.112151
PA,0.069167,0.02,0.1025,0.063889
text,0.055,0.02,0.126721,0.06724
Lateral_text,0.127222,0.02,0.117857,0.08836
Lateral_PA,0.115694,0.02,0.1025,0.079398
Lateral,0.099583,0.02,0.117857,0.079147


Experiment Mimic_2020_11_02_09_33_45_520718 with text encoding: word, image size: 256.0, method: joint_elbo 
 and trained for 99.0 epochs with batch size: 100.0 and 200.0 steps per training epoch


Unnamed: 0_level_0,Pleural Effusion,Support Devices,Lung Opacity,mean_AP
Steps: 20500,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PA_text,0.071759,0.025,0.18958,0.095446
Lateral_PA_text,0.101979,0.025,0.167623,0.098201
PA,0.071759,0.025,0.15572,0.08416
text,0.065,0.025,0.134706,0.074902
Lateral_text,0.132361,0.025,0.158792,0.105385
Lateral_PA,0.126256,0.025,0.153046,0.101434
Lateral,0.114028,0.025,0.131389,0.090139


Experiment Mimic_2020_10_27_14_47_11_079212 with text encoding: char, image size: 128.0, method: jsd 
 and trained for 99.0 epochs with batch size: 180.0 and 100.0 steps per training epoch


Unnamed: 0_level_0,Pleural Effusion,Support Devices,Lung Opacity,mean_AP
Steps: 10300,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PA_text,0.102381,0.027778,0.131647,0.087269
Lateral_PA_text,0.069444,0.027778,0.129117,0.075446
PA,0.069444,0.027778,0.115625,0.070949
text,0.069444,0.027778,0.112401,0.069874
Lateral_text,0.069444,0.027778,0.13125,0.076157
Lateral_PA,0.069444,0.027778,0.102778,0.066667
Lateral,0.069444,0.027778,0.102778,0.066667


Experiment Mimic_2020_10_23_13_17_46_515651 with text encoding: char, image size: 128.0, method: joint_elbo 
 and trained for 299.0 epochs with batch size: 256.0 and -1.0 steps per training epoch


Unnamed: 0_level_0,Pleural Effusion,Support Devices,Lung Opacity,mean_AP
Steps: 71100,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PA_text,0.054688,0.027344,0.154948,0.078993
Lateral_PA_text,0.172519,0.027344,0.158118,0.119327
PA,0.056276,0.027344,0.127363,0.070328
text,0.12221,0.027344,0.11942,0.089658
Lateral_text,0.114397,0.027344,0.182021,0.107921
Lateral_PA,0.062686,0.027344,0.126877,0.072302
Lateral,0.060985,0.027344,0.153607,0.080645


 # Evaluation of the classifiers
 All classifiers were trained for 100 epochs

In [11]:
labels = ['Lung Opacity', 'Pleural Effusion', 'Support Devices']



## Evaluation of the character encoding and image size 128

In [12]:
results = test_clfs(FLAGS, 128, 'char', alphabet)
for modality in results.keys():
    print(f'mean precision for {modality} classifier: ', np.mean(results[modality]['list_precision_vals']))

setting dataset
setting modalities
setting model
setting clfs
setting rec_weights
dict_keys(['real', 'random', '', 'PA', 'Lateral', 'text', 'Lateral_PA', 'PA_text', 'Lateral_text', 'Lateral_PA_text'])


  _warn_prf(average, modifier, msg_start, len(result))


mean precision for PA classifier:  0.23816772227360228
mean precision for Lateral classifier:  0.20507691018527285
mean precision for text classifier:  0.5219933947379704


In [13]:
for modality in results.keys():
    print(f'report {modality}: \n', results[modality]['report'])

report PA: 
                   precision    recall  f1-score   support

    Lung Opacity     0.0000    0.0000    0.0000        33
            None     0.8233    1.0000    0.9031       247
Pleural Effusion     0.0000    0.0000    0.0000        13
 Support Devices     0.0000    0.0000    0.0000         7

        accuracy                         0.8233       300
       macro avg     0.2058    0.2500    0.2258       300
    weighted avg     0.6779    0.8233    0.7436       300

report Lateral: 
                   precision    recall  f1-score   support

    Lung Opacity     0.0000    0.0000    0.0000        33
            None     0.8227    0.9960    0.9011       247
Pleural Effusion     0.0000    0.0000    0.0000        13
 Support Devices     0.0000    0.0000    0.0000         7

        accuracy                         0.8200       300
       macro avg     0.2057    0.2490    0.2253       300
    weighted avg     0.6774    0.8200    0.7419       300

report text: 
                   pr

## Evaluation of the word encoding and image size 128
### The text classifier precision is slightly better for the word encoding

In [14]:
results = test_clfs(FLAGS, 128, 'word', alphabet)
for modality in results.keys():
    print(f'mean precision for {modality} classifier: ', np.mean(results[modality]['list_precision_vals']))

setting dataset
setting modalities
setting model
setting clfs
setting rec_weights
dict_keys(['real', 'random', '', 'PA', 'Lateral', 'text', 'Lateral_PA', 'PA_text', 'Lateral_text', 'Lateral_PA_text'])


  _warn_prf(average, modifier, msg_start, len(result))


mean precision for PA classifier:  0.2652346862374294
mean precision for Lateral classifier:  0.2659164796671524
mean precision for text classifier:  0.6508058343524366


In [15]:
for modality in results.keys():
    print(f'report {modality}: \n', results[modality]['report'])

report PA: 
                   precision    recall  f1-score   support

    Lung Opacity     0.0000    0.0000    0.0000        44
            None     0.7833    1.0000    0.8785       235
Pleural Effusion     0.0000    0.0000    0.0000        17
 Support Devices     0.0000    0.0000    0.0000         4

        accuracy                         0.7833       300
       macro avg     0.1958    0.2500    0.2196       300
    weighted avg     0.6136    0.7833    0.6882       300

report Lateral: 
                   precision    recall  f1-score   support

    Lung Opacity     0.0000    0.0000    0.0000        44
            None     0.7826    0.9957    0.8764       235
Pleural Effusion     0.0000    0.0000    0.0000        17
 Support Devices     0.0000    0.0000    0.0000         4

        accuracy                         0.7800       300
       macro avg     0.1957    0.2489    0.2191       300
    weighted avg     0.6130    0.7800    0.6865       300

report text: 
                   pr

## Evaluation of image size 256


In [7]:
import numpy as np

from mimic.dataio.MimicDataset import Mimic
from mimic.utils.experiment import MimicExperiment
FLAGS.text_encoding = 'word'
FLAGS.img_size = 256
mimic_experiment = MimicExperiment(flags=FLAGS, alphabet=alphabet)
mimic_test = Mimic(FLAGS, mimic_experiment.labels, alphabet, split='eval')

setting dataset
setting modalities
setting model
setting clfs
setting rec_weights
dict_keys(['real', 'random', '', 'PA', 'Lateral', 'text', 'Lateral_PA', 'PA_text', 'Lateral_text', 'Lateral_PA_text'])


In [8]:
report_pa, list_precision_pa = test_clf(FLAGS, mimic_experiment, mimic_test, 'PA')
report_lat, list_precision_lat = test_clf(FLAGS, mimic_experiment, mimic_test, 'Lateral')
report_text, list_precision_text = test_clf(FLAGS, mimic_experiment, mimic_test, 'text')
print('mean precision for pa classifier: ',np.mean(list_precision_pa))
print('mean precision for lat classifier: ',np.mean(list_precision_lat))
print('mean precision for text classifier: ',np.mean(list_precision_text))
print('report pa: ', report_pa)
print('report lat: ', report_lat)
print('report text: ', report_text)

  _warn_prf(average, modifier, msg_start, len(result))


mean precision for pa classifier:  0.22456472232340316
mean precision for lat classifier:  0.2522556370555589
mean precision for text classifier:  0.5960240907077753
report pa:                    precision    recall  f1-score   support

    Lung Opacity     0.0000    0.0000    0.0000        27
            None     0.8333    1.0000    0.9091       250
Pleural Effusion     0.0000    0.0000    0.0000        20
 Support Devices     0.0000    0.0000    0.0000         3

        accuracy                         0.8333       300
       macro avg     0.2083    0.2500    0.2273       300
    weighted avg     0.6944    0.8333    0.7576       300

report lat:                    precision    recall  f1-score   support

    Lung Opacity     0.0000    0.0000    0.0000        38
            None     0.8033    1.0000    0.8909       241
Pleural Effusion     0.0000    0.0000    0.0000        16
 Support Devices     0.0000    0.0000    0.0000         5

        accuracy                         0.8033   