# Environment Setting

In [1]:
import pandas as pd
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
import numpy as np
from PIL import Image
os.environ["SM_FRAMEWORK"] = "tf.keras"
import segmentation_models as sm

Segmentation Models: using `tf.keras` framework.


In [2]:
strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"])

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    tf.config.experimental.set_memory_growth(gpus[0], True)
  except RuntimeError as e:
    print(e)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Visible devices cannot be modified after being initialized


In [3]:
import os
import random

input_dir = "./data/annotation_JPG_process/"
target_dir = './data/annotation_masked_edited_320'


img_size = (320, 320)
num_classes = 9
pre_split = False

input_img_paths = sorted(
    [
        os.path.join(input_dir, fname)
        for fname in os.listdir(input_dir)
        if fname.endswith(".JPG")
    ]
)

target_img_paths = sorted(
    [
        os.path.join(target_dir, fname)
        for fname in os.listdir(target_dir)
        if fname.endswith(".npy") and not fname.startswith(".")
    ]
)

print("Number of samples:", len(input_img_paths))


val_samples = 600
seed = 42
random.Random(seed).shuffle(input_img_paths)
random.Random(seed).shuffle(target_img_paths)
train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]
val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]

matching_paths = []
for input_path in train_input_img_paths:
    input_filename = os.path.splitext(input_path)[0].split('/')[-1]  
    for target_path in train_target_img_paths:
        target_filename = os.path.splitext(target_path)[0].split('/')[-1]  
        if input_filename == target_filename:
            matching_paths.append((input_path, target_path))
            break
        
train_input_img_paths, train_target_img_paths = zip(*matching_paths)
train_input_img_paths = list(train_input_img_paths)
train_target_img_paths = list(train_target_img_paths)

Number of samples: 2622


In [4]:
import random

def convertimg(img_paths,img_size,is_training):
    x = []
    for j, path in enumerate(img_paths):

        img = np.array(Image.open(path))
        img = img[:,:,:3]
        img = Image.fromarray(img)     
        img = np.array(img.resize(img_size))
        randn = round(random.uniform(0,1)*100)
        x.append(img)
    return np.array(x)

# classification label 
def extract_label(path):
    label_df = pd.read_csv("./2022-OCT-Seg-Data/path_label_total.csv")
    file_name = path.split("/")[-1].split(".")[0]
    
    label = label_df[label_df["path"] == file_name]["label"].values[0]
    if label == "CRVO":
        label = 0
    elif label == "CSC":
        label = 1
    elif label == "DM":
        label = 2
    elif label == "ERM":
        label = 3
    elif label == "MH":
        label = 4
    elif label == "Normal":
        label = 5
    elif label == "PCV":
        label = 6
    elif label == "RAP":
        label = 7
    elif label == "wetAMD":
        label = 8

    return label


def seg_label(path, img_size=(320,320)):
    path = path.replace('tif', 'npy')
    img = np.load(path)
    y = img.astype('int').reshape(img_size + (1,))

    return y

def extract_erm_label(path):
    erm_data = pd.read_csv("./erm_data.txt")
    file_name = path.split("/")[-1].split(".")[0]
    erm_data = np.array(erm_data['fname'])
    if file_name in erm_data:
        return 1
    return 0

# Split our img paths into a training and a validation set
val_samples = 600


val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]


val_img = convertimg(val_input_img_paths, img_size, False)

n_classes = 9

tf.config.run_functions_eagerly(True)

########### clf label ############
train_cls_label = []
val_cls_label = []
train_seg_label = []
val_seg_label = []
train_erm_label = []
val_erm_label = []


for path in val_target_img_paths:
    val_seg_label.append(seg_label(path))


for path in val_input_img_paths:
    val_cls_label.append(extract_label(path))

for path in val_input_img_paths:
    val_erm_label.append(extract_erm_label(path))


In [7]:
# Model
seg_model = sm.Unet('resnet101', classes=9, activation='softmax', input_shape=(320, 320, 3), encoder_weights='imagenet', encoder_freeze=False)


last_layer_name = 'seg_output'
seg_model.layers[-1]._name = last_layer_name


seg_model = tf.keras.models.Model(inputs=seg_model.inputs, outputs=seg_model.outputs, name=seg_model.name)

clf_model = tf.keras.Model(inputs=seg_model.input, outputs=seg_model.get_layer('relu1').output)
x = tf.keras.layers.GlobalAveragePooling2D()(clf_model.output)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(9, activation='softmax', name = "cls_output")(x)
clf_model = tf.keras.Model(inputs=clf_model.input, outputs=x)

# ERM model
# Define a binary classification head for predicting the train_erm_label value
erm_model = tf.keras.Model(inputs=seg_model.input, outputs=seg_model.get_layer('relu1').output)
x = tf.keras.layers.GlobalAveragePooling2D()(erm_model.output)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(1, activation='sigmoid', name='erm_output')(x)
erm_model = tf.keras.Model(inputs=erm_model.input, outputs=x)


# Define inputs and outputs
seg_inputs = seg_model.input
clf_inputs = clf_model.input
erm_inputs = erm_model.input
seg_output = seg_model.output
cls_output = clf_model.output
erm_output = erm_model.output

model = tf.keras.Model(inputs=seg_inputs, outputs=[seg_output, cls_output, erm_output])

model.load_weights('./new_ratio_resnet101_320_5_b_4_d_0.5_ermw_0.2_r_1.0.h5')

In [8]:
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + 1.) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1.)

def dice_coef_multilabel(y_true, y_pred, numLabels=9):
    dice=0
    for index in range(numLabels):
        dice -= dice_coef(y_true[:,:,index,:,:], y_pred[:,:,index,:,:])
    return dice

def dice_coef_loss(y_true, y_pred):
    return 0.5*(1 - dice_coef(y_true, y_pred)) + 0.5*tf.keras.losses.CategoricalCrossentropy()(y_true, y_pred)

def tversky(y_true, y_pred, smooth=1, alpha=0.7):
    y_true_pos = K.flatten(y_true)
    y_pred_pos = K.flatten(y_pred)
    true_pos = K.sum(y_true_pos * y_pred_pos)
    false_neg = K.sum(y_true_pos * (1 - y_pred_pos))
    false_pos = K.sum((1 - y_true_pos) * y_pred_pos)
    return (true_pos + smooth) / (true_pos + alpha * false_neg + (1 - alpha) * false_pos + smooth)

def tversky_loss(y_true, y_pred):
    return 1 - tversky(y_true, y_pred)

def focal_tversky_loss(y_true, y_pred, gamma=0.75):
    tv = tversky(y_true, y_pred)
    return K.pow((1 - tv), gamma)

In [9]:
val_preds, clf_result, erm_result = model.predict(val_img)

  "Even though the `tf.config.experimental_run_functions_eagerly` "


# Visualize Segmentation Results

In [12]:
df = pd.read_csv("./2022-OCT-Seg-Data/labels.csv")
label_idx = []
labels = ['R32', 'YG23', 'BG02', 'B29', 'E15', 'YR16', 'E43', 'YR00',]
df = df[df.copic.isin(labels)]

for row in df.values:
    r, g, b = row[1], row[2], row[3]
    label_idx.append([r,g,b])


label_idx.append([0, 0, 0])

keys= []
for file in val_input_img_paths:
    key = file.split('/')[-1].replace('.npy', '')
    keys.append(key)

In [None]:
from matplotlib import pyplot as plt
import numpy as np

new_mask_path = './data/annotation_masked_edited_320/'

for idx in range(10,20):
    source = np.array(Image.open(val_input_img_paths[idx]).resize((320,320)))

    key = val_input_img_paths[idx].split('/')[-1].replace('.JPG', '')
    source_path = new_mask_path+ key + '.npy'

    mask = np.argmax(val_preds[idx], axis=-1)
    pred = np.zeros((mask.shape) + (3,))
    for i in range(pred.shape[0]):
        for j in range(pred.shape[1]):
            pred[i, j] = np.array(label_idx)[int(mask[i, j])]
    pred = Image.fromarray(pred.astype(np.uint8))
    fname = val_input_img_paths[idx].split('/')[-1].replace("JPG", "tiff")

    mask = np.load(source_path)
    target = np.zeros((320,320) + (3,))
    for i in range(target.shape[0]):
        for j in range(target.shape[1]):
            target[i, j] = np.array(label_idx)[int(mask[i, j])]
    target = Image.fromarray(target.astype(np.uint8))

    val = np.load(val_target_img_paths[idx])
    val = keras.utils.to_categorical(val.astype('float32'))
    val = keras.utils.to_categorical(mask.astype('float32'))
    dice = dice_coef(val, val_preds[idx])

    plt.figure(figsize=(20,5))
    plt.subplot(1,3,1)
    plt.title('Source')
    plt.imshow(source)
    plt.subplot(1,3,2)
    plt.title('Target')
    plt.imshow(target)

    plt.subplot(1,3,3)
    plt.title(f'Prediction: {dice}')
    plt.imshow(pred)
    plt.show()

In [15]:
from matplotlib import pyplot as plt
import numpy as np

new_mask_path = './data/annotation_masked_edited_320/'
cls_dice = []

def dice_coef_multilabel(y_true, y_pred, numLabels=9):
    dice=[]
    for index in range(numLabels):
        res = dice_coef(y_true[:,:,index], y_pred[:,:,index]).numpy()
        dice.append(res)
    return dice

for idx in range(600):
    try:
        key = val_input_img_paths[idx].split('/')[-1].replace('.JPG', '')
        source_path = new_mask_path+ key + '.npy'
        mask = np.load(source_path)

        val = keras.utils.to_categorical(mask.astype('float32'))
        dice = dice_coef_multilabel(val, val_preds[idx])
        cls_dice.append(dice)
    except Exception as e:
        print(e)

[Errno 2] No such file or directory: '/home/dsail/jiwonkim/2022-seg/data/annotation_masked_edited_320/00377957_vol_F_54_20150414_L_703_50_189612_bscan_8.npy'
[Errno 2] No such file or directory: '/home/dsail/jiwonkim/2022-seg/data/annotation_masked_edited_320/540471201805032013.npy'
[Errno 2] No such file or directory: '/home/dsail/jiwonkim/2022-seg/data/annotation_masked_edited_320/00162067_vol_M_57_20200406_L_703_50_219567_bscan_19.npy'
[Errno 2] No such file or directory: '/home/dsail/jiwonkim/2022-seg/data/annotation_masked_edited_320/00603644_vol_F_67_20200417_L_703_50_227686_bscan_18.npy'
[Errno 2] No such file or directory: '/home/dsail/jiwonkim/2022-seg/data/annotation_masked_edited_320/416023201506242018.npy'
[Errno 2] No such file or directory: '/home/dsail/jiwonkim/2022-seg/data/annotation_masked_edited_320/542284201805251018.npy'


In [None]:
from matplotlib import pyplot as plt
import numpy as np

new_mask_path = './data/annotation_masked_edited_320/'
cls_dice = []

for idx in range(600):
    try:
        key = val_input_img_paths[idx].split('/')[-1].replace('.JPG', '')
        source_path = new_mask_path+ key + '.npy'
        mask = np.load(source_path)

        target_img = np.array(Image.open(val_input_img_paths[idx]).resize((320,320)))

        print(mask.shape)
        val = keras.utils.to_categorical(mask.astype('float32'))
        print(val.shape)
        dice = dice_coef_multilabel(val, val_preds[idx])
        cls_dice.append(dice)
        print(cls_dice)

        pred_mask = np.argmax(val_preds[idx], axis=-1)
        pred = np.zeros((pred_mask.shape) + (3,))
        for i in range(pred.shape[0]):
            for j in range(pred.shape[1]):
                pred[i, j] = np.array(label_idx)[int(pred_mask[i, j])]
        pred = Image.fromarray(pred.astype(np.uint8))

        target = np.zeros((320,320) + (3,))
        for i in range(target.shape[0]):
            for j in range(target.shape[1]):
                target[i, j] = np.array(label_idx)[int(mask[i, j])]
        target = Image.fromarray(target.astype(np.uint8))


        plt.figure(figsize=(20,5))
        plt.subplot(1,3,1)
        plt.title('Source')
        plt.imshow(pred)
        plt.subplot(1,3,2)
        plt.title('Target')
        plt.imshow(target)
        plt.subplot(1,3,3)
        plt.title('Target img')
        plt.imshow(target_img)
        plt.show()

        
    
    except Exception as e:
        print(e)

    break

In [114]:
unique_label = []
for idx in range(600):
    val = np.load(val_target_img_paths[idx])
    unique_label.append(val)
np.unique(unique_label)

array([0., 1., 2., 3., 4., 5., 6., 7., 8.])

In [None]:
dice_per_class = np.mean(np.array(cls_dice), axis=0)
pd.DataFrame(dice_per_class).transpose()

In [None]:
dice_score = []
for idx in range(600):
        try:
                key = val_input_img_paths[idx].split('/')[-1].replace('.JPG', '')
                source_path = new_mask_path+ key + '.npy'
                mask = np.load(source_path)
                val = keras.utils.to_categorical(mask.astype('float32'))

                dice = dice_coef(val, val_preds[idx])
                dice_score.append(dice)
        except:
                pass

np.array(dice_score).mean()

# Classification Result

In [117]:
pred_label = []
for i in range(len(val_preds)):
    pred_label.append(np.argmax(clf_result[i]))
cls_label = val_cls_label

In [118]:

print(len(val_cls_label))
print(len(pred_label))

600
600


In [None]:
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score, roc_auc_score, accuracy_score
from sklearn.metrics import classification_report

gamma = 1.0

def class_report(y_pred,y_true,name):
    #multiclass classification report
    sensitivty = recall_score(y_true, y_pred, pos_label=1, average='weighted')
    specificity = recall_score(y_true, y_pred, pos_label=0, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    macro_precision = precision_score(y_true, y_pred, average='macro')
    macro_recall = recall_score(y_true, y_pred, average='macro')
    macro_f1 = f1_score(y_true, y_pred, average='macro')

    with open(f'./{name}_cr.txt', 'w') as f:
        # print(classification_report(y_true, y_pred), file=f)
        print("accuracy: ", accuracy_score(y_true, y_pred), file=f)
        print("sensitivity: ", sensitivty, file=f)
        print("specificity: ", specificity, file=f)
        print("precision: ", precision, file=f)
        print("f1: ", f1, file=f)
        print("macro_precision: ", macro_precision, file=f)
        print("macro_recall: ", macro_recall, file=f)
        print("macro_f1: ", macro_f1, file=f)
        print("", file=f)
        print("Confusion Matrix: ",file=f)
        print(confusion_matrix(y_true, y_pred), file=f)

# print(classification_report(val_cls_label, pred_label))
print("accuracy: ", accuracy_score(val_cls_label, pred_label))
#classification  result
class_report(val_cls_label, pred_label, 'results')

#multi label classification report
classification_report(val_cls_label, pred_label)

In [None]:
from sklearn.metrics import classification_report, accuracy_score

print(classification_report(val_cls_label, pred_label))
print("accuracy: ", accuracy_score(val_cls_label, pred_label))

In [None]:
def draw_confusion_matrix(y_true, y_pred, path='', cname=['CRVO','CSC', 'DM', 'ERM', 'MH', 'Normal', 'PCV', 'RAP', 'wetAMD']):
    from sklearn.metrics import confusion_matrix
    import seaborn as sns
    import matplotlib.pyplot as plt
    import pandas as pd

    cm = confusion_matrix(y_true, y_pred)
    
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    cm = np.round(cm, 2)
    df_cm = pd.DataFrame(cm, columns=cname, index=cname)
    plt.figure(figsize=(5.5, 4))
    sns.heatmap(df_cm, annot=True, cmap="Blues")
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(path)

draw_confusion_matrix(cls_label, pred_label)

In [None]:
from keras.metrics import Accuracy
accuracy = Accuracy()
rounded_erm_result = np.round(erm_result)
erm_label = val_erm_label
pred_label = accuracy(erm_label, rounded_erm_result)
print("ERM accuracy:", pred_label.numpy())