In [None]:
!nvidia-smi

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import skimage.transform as st
import os
import gc
import warnings
from sklearn.metrics import classification_report, average_precision_score
from PIL import Image
from Parse_TFrecords import *
from define_model import *
from load_data import *
from utilities import *
 
print(tf.__version__)

warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

gpus = tf.config.list_physical_devices(device_type='GPU')
tf.config.set_visible_devices(devices=gpus[0], device_type='GPU')

In [None]:
pos = 27297
neg = 119133

total = pos+neg

weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

# Segmentation images

In [None]:
import cv2 as cv
def coordinate(image):
    img = np.array(image, np.float32)
    img_zero = np.zeros((img.shape[0], img.shape[1]))

    kernel = np.ones((5,5), np.uint8)
    erosion = cv.erode(img, kernel, iterations = 1)
    dilation = cv.dilate(erosion, kernel, iterations = 1)
    plt.imshow(dilation)
    x_array = []
    y_array = []
    for x in range(img.shape[0]):
            flag = 0
            for y in range(img.shape[1]):
                if (dilation[y][x] > 0 and flag == 0):
                    flag = 1
                    img_zero[y][x] = 100
                    x_array.append(x)

    for y in range(img.shape[1]):
        flag = 0
        for x in range(img.shape[0]):
                if (dilation[y][x] > 0 and flag == 0):
                    flag = 1
                    img_zero[y][x] = 100
                    y_array.append(y)
    img_zero = np.zeros((img.shape[0], img.shape[1]))
    try:
        bot_right_x = max(x_array)
        bot_right_y = max(y_array)
        top_left_x = min(x_array)
        top_left_y = min(y_array)
    except:
        return [0, 0, 0, 0]

    # di = dilation[top_left_y:bot_right_y+1, top_left_x:bot_right_x+1]
    # di = st.resize(di, (256, 256))
    # plt.imshow(di)
    return [top_left_y, bot_right_y+1, top_left_x, bot_right_x+1]

In [None]:
# Load images and retrieve the coordinator contained lungs
subject_ids = []
study_ids = []
coordinates = []
filename = ['../AI-Venger/data/segmentation_train.tfrecords']
raw_dataset = tf.data.TFRecordDataset(filename)
for raw_record in raw_dataset:
    #loads data  from the trecord all info 
    example = tf.train.Example() # subject id and study_id (contains several chest Xrays) 
    example.ParseFromString(raw_record.numpy()) #reads the example 

    subject_id = example.features.feature['subject_id'].int64_list.value[0] # change subject_id to type int
    study_id = example.features.feature['study_id'].int64_list.value[0]
    img_bytes = np.fromstring(example.features.feature['lung_image'].bytes_list.value[0], np.uint8)
    img_np = cv.imdecode(img_bytes, cv.IMREAD_GRAYSCALE)
    img_np = Image.fromarray(img_np).convert("P")
    coordinates.append(coordinate(img_np))
    subject_ids.append(subject_id)
    study_ids.append(study_id)

In [None]:
import pandas as pd
d = {'subject_id': subject_ids, 'study_id': study_ids, 'coordinate': coordinates}
df = pd.DataFrame(data=d)
df

In [None]:
df.to_csv(index=False, path_or_buf='train_coordinate')

In [None]:
# Load images and retrieve the coordinator contained lungs
subject_ids = []
study_ids = []
coordinates = []
filename = ['../AI-Venger/data/segmentation_test.tfrecords']
raw_dataset = tf.data.TFRecordDataset(filename)
for raw_record in raw_dataset:
    #loads data  from the trecord all info 
    example = tf.train.Example() # subject id and study_id (contains several chest Xrays) 
    example.ParseFromString(raw_record.numpy()) #reads the example 

    subject_id = example.features.feature['subject_id'].int64_list.value[0] # change subject_id to type int
    study_id = example.features.feature['study_id'].int64_list.value[0]
    img_bytes = np.fromstring(example.features.feature['lung_image'].bytes_list.value[0], np.uint8)
    img_np = cv.imdecode(img_bytes, cv.IMREAD_GRAYSCALE)
    img_np = Image.fromarray(img_np).convert("P")
    coordinates.append(coordinate(img_np))
    subject_ids.append(subject_id)
    study_ids.append(study_id)
d = {'subject_id': subject_ids, 'study_id': study_ids, 'coordinate': coordinates}
df = pd.DataFrame(data=d)
df.to_csv(index=False, path_or_buf='test_coordinate')

In [None]:
# Load images and retrieve the coordinator contained lungs
subject_ids = []
study_ids = []
coordinates = []
filename = ['../AI-Venger/data/segmentation_val.tfrecords']
raw_dataset = tf.data.TFRecordDataset(filename)
for raw_record in raw_dataset:
    #loads data  from the trecord all info 
    example = tf.train.Example() # subject id and study_id (contains several chest Xrays) 
    example.ParseFromString(raw_record.numpy()) #reads the example 

    subject_id = example.features.feature['subject_id'].int64_list.value[0] # change subject_id to type int
    study_id = example.features.feature['study_id'].int64_list.value[0]
    img_bytes = np.fromstring(example.features.feature['lung_image'].bytes_list.value[0], np.uint8)
    img_np = cv.imdecode(img_bytes, cv.IMREAD_GRAYSCALE)
    img_np = Image.fromarray(img_np).convert("P")
    coordinates.append(coordinate(img_np))
    subject_ids.append(subject_id)
    study_ids.append(study_id)
d = {'subject_id': subject_ids, 'study_id': study_ids, 'coordinate': coordinates}
df = pd.DataFrame(data=d)
df.to_csv(index=False, path_or_buf='val_coordinate')

In [None]:
df0 = pd.read_csv('test_coordinate')
df1 = pd.read_csv('val_coordinate')
df2 = pd.read_csv('train_coordinate')

df = pd.concat([df0, df1, df2], ignore_index=True)

df.to_csv('seg_coordinate')

## train

In [None]:
checkpoint_filepath = 'checkpoints/AUC/checkpoint_seg_Dnet121'
monitor_ = 'val_auc'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    save_weights_only=True,
    monitor=monitor_,
    mode='max')

callback = [tf.keras.callbacks.LearningRateScheduler(scheduler),
            tf.keras.callbacks.EarlyStopping(mode='max', patience=3, monitor=monitor_),
            model_checkpoint_callback]

In [None]:
BATCH_SIZE = 32

record_file_train = 'copd_train_seg_new.tfrecords'
train_dataset = (tf.data.TFRecordDataset(
    record_file_train, buffer_size=BATCH_SIZE*10, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_train)
.shuffle(total)
.batch(BATCH_SIZE))

record_file_val = 'copd_val_seg_new.tfrecords'
val_dataset = (tf.data.TFRecordDataset(
    record_file_val, buffer_size=BATCH_SIZE*10, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_train)
.shuffle(36601)
.batch(BATCH_SIZE))

In [None]:
model = load_model_from_pretrain('Dnet121')

model.summary()

metric = tf.keras.metrics.AUC(name='auc')

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                 optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), metrics=metric)

model.fit(train_dataset, epochs=20, shuffle=True, validation_data=val_dataset, callbacks=callback, class_weight=class_weight)

gc.collect()

## test

In [None]:
record_file_val = 'copd_test_seg_new.tfrecords'
test_dataset = (tf.data.TFRecordDataset(
    record_file_val, buffer_size=256, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_test)
.batch(BATCH_SIZE))

y_test = get_data_label(split='test', category=None, types=0)

In [None]:
checkpoint_filepath = 'checkpoints/AUC/checkpoint_seg_Dnet121'
model = load_model_from_pretrain('Dnet121')

model.load_weights(checkpoint_filepath)

y_preds = model.predict(test_dataset)

In [None]:
# 1034
test_CI(y_preds, y_test)

print(average_precision_score(y_test, y_preds, average=None))

thresh = get_thresh(y_test, y_preds, 'Youden')
print('thresh:', thresh)

print(classification_report(y_test, np.where(y_preds >= thresh, 1, 0)))

thresh = get_thresh(y_test, y_preds, 'G-mean')
print('thresh:', thresh)

print(classification_report(y_test, np.where(y_preds >= thresh, 1, 0)))

thresh = 0.5
print('thresh:', thresh)

print(classification_report(y_test, np.where(y_preds >= thresh, 1, 0)))

gc.collect()