In [1]:
!nvidia-smi

Tue Nov 22 10:32:33 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.67       Driver Version: 460.67       CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 3090    Off  | 00000000:65:00.0 Off |                  N/A |
| 65%   50C    P8    50W / 370W |  23549MiB / 24265MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 3090    Off  | 00000000:B3:00.0 Off |                  N/A |
| 63%   55C    P2   138W / 370W |   5419MiB / 24268MiB |      7%      Defaul

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2 as cv
import warnings
import skimage.transform as st
import gc
import os
from IPython import display
from transformation import *
from utilities import *
from calculate_disparity import *

print(tf.__version__)
warnings.filterwarnings("ignore")

gpus = tf.config.list_physical_devices(device_type='GPU')
tf.config.set_visible_devices(devices=gpus[1], device_type='GPU')

2.5.0


In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
# Restrict TensorFlow to only allocate 1GB of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[1],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)]) # Notice here
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
        print(e)


2 Physical GPUs, 1 Logical GPUs


In [4]:
seed = 2021
os.environ['PYTHONHASHSEED']=str(seed)
tf.random.set_seed(seed)
np.random.seed(seed)

In [5]:
callback = [tf.keras.callbacks.LearningRateScheduler(scheduler),
           tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=4)]

# Demographic Attributes Classification

In [6]:
range_list = [20, 45, 90]
filter_type = 'rotation'
is_diseases = False

for i in range_list:
        
    print(i)
    X_train, y_train = get_data(cv_filter=filter_type, data_split='train', Range=i, diseases=False)
    X_val, y_val = get_data(cv_filter=filter_type, data_split='val', Range=i, diseases=False)
    X_test, y_test = get_data(cv_filter=filter_type, data_split='test', Range=i, diseases=False)

#     df = pd.DataFrame({"WHITE": [], "AFRICAN AMERICAN": [], "ASIA":[]})
#     df = pd.DataFrame({"Male": [], "Female": []})
    df = pd.DataFrame({"0-40": [], "40-60": [], "60-80":[], "80-":[]})

    for j in range(5):

        model = define_model(4)

        model.fit(X_train, y_train, epochs=15, callbacks=callback, batch_size=BATCH_SIZE, validation_data=(X_val, y_val))

        y_preds = model.predict(X_test)
        
        auc_score = cal_auc(y_test, y_preds)
#         df = df.append({'WHITE': auc_score[0], 'AFRICAN AMERICAN': auc_score[1], 'ASIA': auc_score[2]}, ignore_index=True)
#         df = df.append({'Male': auc_score[0], 'Female': auc_score[1]}, ignore_index=True)
        df = df.append({"0-40": auc_score[0], "40-60": auc_score[1], "60-80": auc_score[2], "80-": auc_score[3]}, ignore_index=True)

        del y_preds, model
        gc.collect()

        display.clear_output()

    df.to_csv('csv_result/{k}{l}_result_age.csv'.format(k=filter_type, l=i))

    del X_train, y_train
    del X_val, y_val
    del X_test, y_test
    gc.collect()

In [7]:
range_list = [4, 5, 6]
filter_type = 'shear'
is_diseases = False

for i in range_list:
        
    print(i)
    X_train, y_train = get_data(cv_filter=filter_type, data_split='train', Range=i, diseases=False)
    X_val, y_val = get_data(cv_filter=filter_type, data_split='val', Range=i, diseases=False)
    X_test, y_test = get_data(cv_filter=filter_type, data_split='test', Range=i, diseases=False)

#     df = pd.DataFrame({"WHITE": [], "AFRICAN AMERICAN": [], "ASIA":[]})
#     df = pd.DataFrame({"Male": [], "Female": []})
    df = pd.DataFrame({"0-40": [], "40-60": [], "60-80":[], "80-":[]})

    for j in range(5):

        model = define_model(4)

        model.fit(X_train, y_train, epochs=15, callbacks=callback, batch_size=BATCH_SIZE, validation_data=(X_val, y_val))

        y_preds = model.predict(X_test)
        
        auc_score = cal_auc(y_test, y_preds)
#         df = df.append({'WHITE': auc_score[0], 'AFRICAN AMERICAN': auc_score[1], 'ASIA': auc_score[2]}, ignore_index=True)
#         df = df.append({'Male': auc_score[0], 'Female': auc_score[1]}, ignore_index=True)
        df = df.append({"0-40": auc_score[0], "40-60": auc_score[1], "60-80": auc_score[2], "80-": auc_score[3]}, ignore_index=True)

        del y_preds, model
        gc.collect()

        display.clear_output()

    df.to_csv('csv_result/{k}{l}_result_age.csv'.format(k=filter_type, l=i))

    del X_train, y_train
    del X_val, y_val
    del X_test, y_test
    gc.collect()

In [8]:
range_list = ['light', 'medium', 'heavy']
filter_type = 'scaling'
is_diseases = False

for i in range_list:
        
    print(i)
    X_train, y_train = get_data(cv_filter=filter_type, data_split='train', Range=i, diseases=False)
    X_val, y_val = get_data(cv_filter=filter_type, data_split='val', Range=i, diseases=False)
    X_test, y_test = get_data(cv_filter=filter_type, data_split='test', Range=i, diseases=False)

#     df = pd.DataFrame({"WHITE": [], "AFRICAN AMERICAN": [], "ASIA":[]})
#     df = pd.DataFrame({"Male": [], "Female": []})
    df = pd.DataFrame({"0-40": [], "40-60": [], "60-80":[], "80-":[]})

    for j in range(5):

        model = define_model(4)

        model.fit(X_train, y_train, epochs=15, callbacks=callback, batch_size=BATCH_SIZE, validation_data=(X_val, y_val))

        y_preds = model.predict(X_test)
        
        auc_score = cal_auc(y_test, y_preds)
#         df = df.append({'WHITE': auc_score[0], 'AFRICAN AMERICAN': auc_score[1], 'ASIA': auc_score[2]}, ignore_index=True)
#         df = df.append({'Male': auc_score[0], 'Female': auc_score[1]}, ignore_index=True)
        df = df.append({"0-40": auc_score[0], "40-60": auc_score[1], "60-80": auc_score[2], "80-": auc_score[3]}, ignore_index=True)

        del y_preds, model
        gc.collect()

        display.clear_output()

    df.to_csv('csv_result/{k}{l}_result_age.csv'.format(k=filter_type, l=i))

    del X_train, y_train
    del X_val, y_val
    del X_test, y_test
    gc.collect()

In [9]:
range_list = [2, 3, 4]
filter_type = 'fisheye'
is_diseases = False

for i in range_list:
        
    print(i)
    X_train, y_train = get_data(cv_filter=filter_type, data_split='train', Range=i, diseases=False)
    X_val, y_val = get_data(cv_filter=filter_type, data_split='val', Range=i, diseases=False)
    X_test, y_test = get_data(cv_filter=filter_type, data_split='test', Range=i, diseases=False)

#     df = pd.DataFrame({"WHITE": [], "AFRICAN AMERICAN": [], "ASIA":[]})
#     df = pd.DataFrame({"Male": [], "Female": []})
    df = pd.DataFrame({"0-40": [], "40-60": [], "60-80":[], "80-":[]})


    for j in range(5):

        model = define_model(4)

        model.fit(X_train, y_train, epochs=15, callbacks=callback, batch_size=BATCH_SIZE, validation_data=(X_val, y_val))

        y_preds = model.predict(X_test)
        
        auc_score = cal_auc(y_test, y_preds)
#         df = df.append({'WHITE': auc_score[0], 'AFRICAN AMERICAN': auc_score[1], 'ASIA': auc_score[2]}, ignore_index=True)
#         df = df.append({'Male': auc_score[0], 'Female': auc_score[1]}, ignore_index=True)
        df = df.append({"0-40": auc_score[0], "40-60": auc_score[1], "60-80": auc_score[2], "80-": auc_score[3]}, ignore_index=True)

        del y_preds, model
        gc.collect()

        display.clear_output()

    df.to_csv('csv_result/{k}{l}_result_age.csv'.format(k=filter_type, l=i))

    del X_train, y_train
    del X_val, y_val
    del X_test, y_test
    gc.collect()

In [10]:
range_list = ['original', 'proposed']
filter_type = ''
is_diseases = False

for i in range_list:
        
    print(i)
    X_train, y_train = get_data(cv_filter=filter_type, data_split='train', Range=i, diseases=False)
    X_val, y_val = get_data(cv_filter=filter_type, data_split='val', Range=i, diseases=False)
    X_test, y_test = get_data(cv_filter=filter_type, data_split='test', Range=i, diseases=False)

#     df = pd.DataFrame({"WHITE": [], "AFRICAN AMERICAN": [], "ASIA":[]})
#     df = pd.DataFrame({"Male": [], "Female": []})
    df = pd.DataFrame({"0-40": [], "40-60": [], "60-80":[], "80-":[]})

    for j in range(5):

        model = define_model(4)

        model.fit(X_train, y_train, epochs=15, callbacks=callback, batch_size=BATCH_SIZE, validation_data=(X_val, y_val))

        y_preds = model.predict(X_test)
        
        auc_score = cal_auc(y_test, y_preds)
#         df = df.append({'WHITE': auc_score[0], 'AFRICAN AMERICAN': auc_score[1], 'ASIA': auc_score[2]}, ignore_index=True)
#         df = df.append({'Male': auc_score[0], 'Female': auc_score[1]}, ignore_index=True)
        df = df.append({"0-40": auc_score[0], "40-60": auc_score[1], "60-80": auc_score[2], "80-": auc_score[3]}, ignore_index=True)


        del y_preds, model
        gc.collect()

        display.clear_output()

    df.to_csv('csv_result/{k}{l}_result_age.csv'.format(k=filter_type, l=i))

    del X_train, y_train
    del X_val, y_val
    del X_test, y_test
    gc.collect()

# Random Augmentation

## TFrecords for random augmentation

In [10]:
from PIL import Image, ImageOps
import io
INPUT_SHAPE = (224, 224, 1)
count = 0

diseased_array = []
race_array = []
age_array = []
gender_array = []
img_array = []
id_array = []
diseases_array = []
bmi_array = []
study_id_array = []

np.random.seed(2021)

filename = ['../Data/Chexpert_val.tfrecords']
raw_dataset = tf.data.TFRecordDataset(filename)

for raw_record in raw_dataset:
    sub_y_race = []
    sub_y_d = []

    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    
    race = example.features.feature['race'].int64_list.value[0]
    age = example.features.feature['age'].int64_list.value[0]
    gender = example.features.feature['gender'].int64_list.value[0]

    sub_y_d.append(1 if example.features.feature['Atelectasis'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Cardiomegaly'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Consolidation'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Edema'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Enlarged Cardiomediastinum'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Fracture'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Lung Lesion'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Lung Opacity'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['No Finding'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Pleural Effusion'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Pleural Other'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Pneumonia'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Pneumothorax'].float_list.value[0] == 1 else 0)
    sub_y_d.append(1 if example.features.feature['Support Devices'].float_list.value[0] == 1 else 0)

    nparr = np.fromstring(example.features.feature['jpg_bytes'].bytes_list.value[0], np.uint8)
    img_np = cv.imdecode(nparr, cv.IMREAD_GRAYSCALE)
    
    rand = np.random.randint(0, 4)
    if (rand == 0):
        seed = np.random.uniform(-np.pi/4, np.pi/4)
        img_np = shear_transform(seed, img_np)
    elif (rand == 1):
        angle = np.random.uniform(-90, 90)
        img_np = rotation_transformation(angle, img_np)
    elif (rand == 2):
        img_np = fish(img_np, 0.4)
    else:
        seed = np.random.uniform(0.4, 1)
        img_np = scaling_transformation(seed, img_np)
    
    image = Image.fromarray(np.uint8(img_np*255))
    imgByteArr = io.BytesIO()
    image.save(imgByteArr, format='JPEG')
    imgByteArr = imgByteArr.getvalue()
    
    race_array.append(race)
    age_array.append(age)
    gender_array.append(gender)
    diseases_array.append(sub_y_d)
    
    img_array.append(imgByteArr)

    count+=1


In [11]:
record_file = 'data/Chexpert_val_transformed.tfrecords'
Labels_diseases = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum', 'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices']

with tf.io.TFRecordWriter(record_file) as writer:
    for i in range(len(img_array)):
        example = tf.train.Example()
        
        example.features.feature['jpg_bytes'].bytes_list.value.append(img_array[i])  
                
        example.features.feature['race'].int64_list.value.append(race_array[i])        
        
        for j in range(14):
            example.features.feature[Labels_diseases[j]].float_list.value.append(diseases_array[i][j])

        example.features.feature['age'].int64_list.value.append(age_array[i])
                
        example.features.feature['gender'].int64_list.value.append(gender_array[i])
        
        writer.write(example.SerializeToString())

In [5]:
def get_data(data_split='test', diseases=False):
    np.random.seed(2021)
    
    X = []
    y = []
    
    if (data_split == 'train'):
        filename = 'data/mimic_train_transformed.tfrecords'
    elif(data_split == 'val'):
        filename = 'data/mimic_val_transformed.tfrecords'
    else:
        filename = 'data/mimic_test_transformed.tfrecords'
        
    raw_dataset = tf.data.TFRecordDataset(filename)
    for raw_record in raw_dataset:
        label = []

        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
                
        ethnicity = example.features.feature['race'].int64_list.value[0]
        
        age = example.features.feature['age'].int64_list.value[0]

        gender = example.features.feature['gender'].int64_list.value[0]
        
        if not (ethnicity == 0 or ethnicity == 1 or ethnicity == 4):
            continue
            
        if (ethnicity == 0):
            label = [1, 0, 0]
        elif (ethnicity == 1):
            label = [0, 1, 0]
        else:
            label = [0, 0, 1]

#         if (gender == 0):
#             label = [1, 0]
#         else:
#             label = [0, 1]

#         if (age == 0 or age == 1):
#             label = [1, 0, 0, 0]
#         elif (age == 2):
#             label = [0, 1, 0, 0]
#         elif (age == 3):
#             label = [0, 0, 1, 0]
#         else:
#             label = [0, 0, 0, 1]

        nparr = np.fromstring(example.features.feature['jpg_bytes'].bytes_list.value[0], np.uint8)
        img_np = cv.imdecode(nparr, cv.IMREAD_GRAYSCALE)

        X.append(np.float32(st.resize(img_np, (224, 224))))
        y.append(label) 
        
        del label, img_np, nparr
        gc.collect()
    
    return np.array(X), np.array(y)

In [None]:
X_train, y_train = get_data(data_split='train', diseases=True)
X_val, y_val = get_data(data_split='val', diseases=True)
X_test, y_test = get_data(data_split='test', diseases=True)

In [None]:
# df = pd.DataFrame({"WHITE": [], "AFRICAN AMERICAN": [], "ASIA":[]})
# df = pd.DataFrame({"Male": [], "Female": []})
df = pd.DataFrame({"0-40": [], "40-60": [], "60-80":[], "80-":[]})

for j in range(5):

    model = define_model(2) # 3 for race, 4 for age, 2 for gender

    model.fit(X_train, y_train, epochs=15, callbacks=callback, batch_size=BATCH_SIZE, validation_data=(X_val, y_val))

    y_preds = model.predict(X_test)

    auc_score = cal_auc(y_test, y_preds)
#     df = df.append({'WHITE': auc_score[0], 'AFRICAN AMERICAN': auc_score[1], 'ASIA': auc_score[2]}, ignore_index=True)
#     df = df.append({'Male': auc_score[0], 'Female': auc_score[1]}, ignore_index=True)
    df = df.append({"0-40": auc_score[0], "40-60": auc_score[1], "60-80": auc_score[2], "80-": auc_score[3]}, ignore_index=True)

    del y_preds, model
    gc.collect()

    display.clear_output()

df.to_csv('csv_result/Random_augmentation_result_age.csv')

# del X_train, y_train
# del X_val, y_val
# del X_test, y_test
gc.collect()