In [1]:
!nvidia-smi

Thu Jan 19 14:47:24 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.67       Driver Version: 460.67       CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 3090    Off  | 00000000:65:00.0 Off |                  N/A |
| 71%   63C    P2   278W / 370W |  21885MiB / 24265MiB |     65%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 3090    Off  | 00000000:B3:00.0 Off |                  N/A |
|  0%   49C    P8    45W / 370W |    421MiB / 24268MiB |      0%      Defaul

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
import pickle
import pandas as pd
import numpy as np
import cv2 as cv
import warnings
import skimage.transform as st
import gc
from IPython import display
from load_data import *
from models import *
from evaluation import *

print(tf.__version__)
warnings.filterwarnings("ignore")

gpus = tf.config.list_physical_devices(device_type='GPU')
tf.config.set_visible_devices(devices=gpus[0], device_type='GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

2.5.0


# Make balanced dataset

In [11]:
np.random.seed(2021)

race_array = []
age_array = []
gender_array = [] 
study_ids = []
subject_ids = []
img_array = []
diseases_array = []
    
filename = ['data/mimic_val_proposed.tfrecords']


raw_dataset = tf.data.TFRecordDataset(filename)
for raw_record in raw_dataset:
    sub_y = []

    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())

#     study_id = example.features.feature['study_id'].int64_list.value[0]
#     subject_id = example.features.feature['subject_id'].int64_list.value[0]
    
    race = example.features.feature['race'].int64_list.value[0]
    if (race == 4):
        race = 2
        
    age = example.features.feature['age'].int64_list.value[0]
    if (age > 0):
        age -= 1
        
    gender = example.features.feature['gender'].int64_list.value[0]
    
    sub_y = []
    sub_y.append(1 if example.features.feature['Atelectasis'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Cardiomegaly'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Consolidation'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Edema'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Enlarged Cardiomediastinum'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Fracture'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Lung Lesion'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Lung Opacity'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['No Finding'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Pleural Effusion'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Pleural Other'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Pneumonia'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Pneumothorax'].float_list.value[0] == 1 else 0)
    sub_y.append(1 if example.features.feature['Support Devices'].float_list.value[0] == 1 else 0)
    
    img = example.features.feature['jpg_bytes'].bytes_list.value[0], np.uint8

    diseases_array.append(sub_y)
#     study_ids.append(study_id)
#     subject_ids.append(subject_id)
    race_array.append(race)
    age_array.append(age)
    gender_array.append(gender)
    img_array.append(img)
    

In [12]:
df = pd.DataFrame({"Race":race_array, "Age":age_array, "Gender":gender_array})

g_ = df.groupby(['Race', 'Age', 'Gender'])
df_ = g_.apply(lambda x: x.sample(g_.size().min())).reset_index(level=[0, 1, 2], drop=True)

In [13]:
idx = df_.index.values

diseases_array = np.array(diseases_array)[idx]
# study_ids = np.array(study_ids)[idx]
# subject_ids = np.array(subject_ids)[idx]
race_array = np.array(race_array)[idx]
age_array = np.array(age_array)[idx]
gender_array = np.array(gender_array)[idx]
img_array = np.array(img_array)[idx]

In [14]:
record_file = 'data/mimic_val_proposed_balanced.tfrecords'
Labels_diseases = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum', 'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices']

with tf.io.TFRecordWriter(record_file) as writer:
    for i in range(len(img_array)):
        example = tf.train.Example()
        
        example.features.feature['jpg_bytes'].bytes_list.value.append(img_array[i][0])  
                
        example.features.feature['race'].int64_list.value.append(race_array[i])
        
#         example.features.feature['subject_id'].int64_list.value.append(subject_ids[i])
        
#         example.features.feature['study_id'].int64_list.value.append(study_ids[i])
        
        for j in range(14):
            example.features.feature[Labels_diseases[j]].float_list.value.append(diseases_array[i][j])

        example.features.feature['age'].int64_list.value.append(age_array[i])
                
        example.features.feature['gender'].int64_list.value.append(gender_array[i])
        
        writer.write(example.SerializeToString())

# Evaluate model on strtified dataset

In [None]:
import glob
import re

aug_method_list = ['rotation', 'shear', 'scaling', 'fisheye']
dataset = 'mimic'
data_split = 'test'
task = 'disease'
archi = 'densenet'
group_name = {'race': ['white', 'black', 'asian'], 'gender': ['male', 'female'], 'age': ['0_40', '40_60', '60_80', '80+']}
group_type = {'race': [0, 1, 4], 'gender': [0, 1], 'age': [0, 1, 2, 3]}

for group in ['age']:
    for i, v in enumerate(group_name[group]):
        name = 'stratified_{v}'.format(v=v)
        model_name = 'model_densenet_mimic_ERM_{name}_proposed'.format(name=name)
        model = define_model(nodes=14, archi=archi)

        checkpoint_filepath = 'checkpoints/'+model_name

        checkpoint = tf.train.Checkpoint(model)
        manager = tf.train.CheckpointManager(checkpoint, directory=checkpoint_filepath, max_to_keep=1, checkpoint_name=model_name)

        model.load_weights(manager.checkpoints[0])

        all_preds = []

        for i2 in aug_method_list:
            filepaths = glob.glob('data/{dataset}_{data_split}_{aug_method}*'.format(dataset=dataset, data_split=data_split, aug_method=i2))

            for j in filepaths:

                splits = re.split('_|\.|\/', j)

                X_test, y_test = get_stratified_data(aug_method='_'+splits[3], dataset=splits[1], data_split=splits[2], group=group, group_type=group_type[group][i])

                all_preds.append(model.predict(X_test))

        y_preds = np.mean(all_preds, axis=0)

        filename = 'predictions/{model_name}_on_aug'.format(model_name=model_name)

        with open(filename, "wb") as fp:
            pickle.dump(y_preds, fp)
        



In [8]:
aug_method = ''
dataset = 'mimic'
task = 'disease'

X_test, y_test, demo = get_data(aug_method=aug_method, dataset=dataset, data_split='test', task=task, return_demo=True)


In [15]:
def compute_result(y_test, demo, result_name, group, testdata):
    group_type = {'race': [0, 1, 4], 'gender': [0, 1], 'age': [0, 1, 2, 3]}
    group_name = {'race': ['white', 'black', 'asian'], 'gender': ['male', 'female'], 'age': ['0_40', '40_60', '60_80', '80+']}

    dfs = []
    for j, target in enumerate(group_type[group]):

        idx = demo[:, list(group_name).index(group)] == target

        dfs_disease = []
        
        file = 'predictions/model_densenet_mimic_ERM_stratified_{group_name}_proposed_on_{testdata}'.format(group=group, group_name=group_name[group][j], testdata=testdata)
        best_thresh = np.loadtxt('thresh/model_densenet_mimic_ERM_stratified_{group_name}_proposed_thresh.txt'.format(group_name=group_name[group][j]))
        with open(file, "rb") as fp:   # Unpickling
            y_preds = pickle.load(fp)
        fp.close()
            
        for k in range(14):

            dfs_disease.append(test(y_preds[:, k], y_test[idx, k], best_thresh[k]))

        dfs.append(dfs_disease)

    with open("results/{i}_on_{testdata}_{group}_results".format(i=result_name, testdata=testdata, group=group), "wb") as fp:
        pickle.dump(dfs, fp)


In [13]:
result_name = 'densenet_mimic_ERM_stratified_proposed'

compute_result(y_test, demo, result_name, 'race', 'original')

result_name = 'densenet_mimic_ERM_stratified_proposed'

compute_result(y_test, demo, result_name, 'race', 'aug')

In [16]:
result_name = 'densenet_mimic_ERM_stratified_proposed'

compute_result(y_test, demo, result_name, 'age', 'original')

result_name = 'densenet_mimic_ERM_stratified_proposed'

compute_result(y_test, demo, result_name, 'age', 'aug')

In [17]:
result_name = 'densenet_mimic_ERM_stratified_proposed'

compute_result(y_test, demo, result_name, 'gender', 'original')

result_name = 'densenet_mimic_ERM_stratified_proposed'

compute_result(y_test, demo, result_name, 'gender', 'aug')

## 3D MRI

In [30]:
def compute_result(y_test, demo, mode, group, testdata):
    group_type = {'race': [0, 1], 'gender': [0, 1], 'age': [0, 1]}
    group_name = {'race': ['white', 'others'], 'gender': ['female', 'male'], 'age': ['young', 'old']}

    dfs = []

    for j in group_type[group]:

        idx = demo == j
        
        file = 'predictions/3D_CNN_AD_CN_stratified_{group_name}{i}_on_{testdata}'.format(i=mode, group_name=group_name[group][j], testdata=testdata)
        best_thresh = np.loadtxt('thresh/3D_CNN_AD_CN_stratified_{group_name}_thresh.txt'.format(group_name=group_name[group][j]))
        with open(file, "rb") as fp:   # Unpickling
            y_preds = pickle.load(fp)
            
        print(file)

        dfs.append(test(y_preds, y_test[idx], best_thresh))

    with open("results/3D_CNN_AD_CN_stratified{i}_on_{testdata}_{group}_results".format(i=mode, group=group, testdata=testdata), "wb") as fp:
        pickle.dump(dfs, fp)

In [31]:
df = pd.read_csv('data_new.csv')
data_path = '../../../mnt/usb/kuopc/ADNI_B1/MPR__GradWarp__B1_Correction_crop/'

df = df.loc[df['Group'] != 'MCI']
df = df.loc[df['Split'] == 'test']

df['Group'] = df['Group'].replace(['CN', 'AD'], [0, 1])
df['Sex'] = df['Sex'].replace(['F', 'M'], [0, 1])
df['Age'] = np.where(df['Age'] <= 75, 0, 1)
df['Race'] = np.where(df['Race'] < 1, 0, 1)

In [None]:
mode = ''

compute_result(df['Group'].values, df['Sex'].values, mode, 'gender', 'original')

mode = ''

compute_result(df['Group'].values, df['Sex'].values, mode, 'gender', 'aug')

In [None]:
mode = '_proposed'

compute_result(df['Group'].values, df['Sex'].values, mode, 'gender', 'original')

mode = '_proposed'

compute_result(df['Group'].values, df['Sex'].values, mode, 'gender', 'aug')

In [None]:
mode = ''

compute_result(df['Group'].values, df['Age'].values, mode, 'age', 'original')

mode = ''

compute_result(df['Group'].values, df['Age'].values, mode, 'age', 'aug')

In [None]:
mode = '_proposed'

compute_result(df['Group'].values, df['Age'].values, mode, 'age', 'original')

mode = '_proposed'

compute_result(df['Group'].values, df['Age'].values, mode, 'age', 'aug')