In [2]:
import pydicom as dicom
import matplotlib.pyplot as plt
import os
import cv2
import pandas as pd
import csv

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.preprocessing import LabelBinarizer


import tensorflow.keras.backend as K

import tensorflow.keras as keras

import time
import seaborn as sns

from skimage import io
import numpy as np
from numpy import genfromtxt

import numpy.random as rng
from sklearn.utils import shuffle

%matplotlib inline

In [39]:
# paths
project_dir = os.getcwd()
# have a folder of dcm images in this path
print(project_dir)

data_dir = os.path.join(project_dir, 'data')
images_dir = os.path.join(data_dir, 'images')

/Users/norbiorb/Data/git/propulsion/FinalProject/mri-classifier


In [8]:
# example of loading the keras facenet model
from keras.models import load_model
# load the model
facenet_model_dir_path = os.path.join(project_dir, 'facenet_model')
facenet_model_path = os.path.join(facenet_model_dir_path, 'facenet_keras.h5')
model = load_model(facenet_model_path)
# summarize input and output shape
print(model.inputs)
print(model.outputs)

[<tf.Tensor 'input_1:0' shape=(None, 160, 160, 3) dtype=float32>]
[<tf.Tensor 'Bottleneck_BatchNorm/batchnorm/add_1:0' shape=(None, 128) dtype=float32>]


In [9]:
model.summary()

Model: "inception_resnet_v1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 160, 160, 3) 0                                            
__________________________________________________________________________________________________
Conv2d_1a_3x3 (Conv2D)          (None, 79, 79, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
Conv2d_1a_3x3_BatchNorm (BatchN (None, 79, 79, 32)   96          Conv2d_1a_3x3[0][0]              
__________________________________________________________________________________________________
Conv2d_1a_3x3_Activation (Activ (None, 79, 79, 32)   0           Conv2d_1a_3x3_BatchNorm[0][0]    
________________________________________________________________________________

Block8_5_Branch_0_Conv2d_1x1 (C (None, 3, 3, 192)    344064      Block8_4_Activation[0][0]        
__________________________________________________________________________________________________
Block8_5_Branch_1_Conv2d_0c_3x1 (None, 3, 3, 192)    110592      Block8_5_Branch_1_Conv2d_0b_1x3_A
__________________________________________________________________________________________________
Block8_5_Branch_0_Conv2d_1x1_Ba (None, 3, 3, 192)    576         Block8_5_Branch_0_Conv2d_1x1[0][0
__________________________________________________________________________________________________
Block8_5_Branch_1_Conv2d_0c_3x1 (None, 3, 3, 192)    576         Block8_5_Branch_1_Conv2d_0c_3x1[0
__________________________________________________________________________________________________
Block8_5_Branch_0_Conv2d_1x1_Ac (None, 3, 3, 192)    0           Block8_5_Branch_0_Conv2d_1x1_Batc
__________________________________________________________________________________________________
Block8_5_B

In [10]:
model.count_params()

22808144

In [16]:
efficient_net = tf.keras.applications.EfficientNetB4(
    include_top=True, weights='imagenet')

In [17]:
efficient_net.summary()

Model: "efficientnetb4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 380, 380, 3) 0                                            
__________________________________________________________________________________________________
rescaling_2 (Rescaling)         (None, 380, 380, 3)  0           input_3[0][0]                    
__________________________________________________________________________________________________
normalization_2 (Normalization) (None, 380, 380, 3)  7           rescaling_2[0][0]                
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 381, 381, 3)  0           normalization_2[0][0]            
_____________________________________________________________________________________

In [19]:
import cnn_helper as hp
json_dir = project_dir + '/data'
df = hp.load_json_as_df(json_dir, 'mri-images')
df.head()

loaded mri-images.json with shape (229, 6)



Unnamed: 0,patient-id,perspective,sequence,perspective-sequence,image-number,image-name
0,TCGA120616,AX,ADC,AX-ADC,1,AX_ADC_Glioma_GBM_TCGA120616_1.jpg
1,TCGA120616,AX,ADC,AX-ADC,2,AX_ADC_Glioma_GBM_TCGA120616_2.jpg
2,TCGA764934,AX,ADC,AX-ADC,1,AX_ADC_Glioma_GBM_TCGA764934_1.jpg
3,TCGA764934,AX,ADC,AX-ADC,2,AX_ADC_Glioma_GBM_TCGA764934_2.jpg
4,TCGA766662,AX,ADC,AX-ADC,1,AX_ADC_Glioma_GBM_TCGA766662_1.jpg


In [20]:
# select column on which to train the model
# can be perspective, sequence or perspective-sequence

x_col = 'image-name' # column in the df with full image names
y_col = 'perspective'

print("'{}' column contains {} unique classes".format(y_col,df[y_col].unique().shape[0]))

from sklearn.model_selection import train_test_split
X_train_full, X_test, y_train_full, y_test = train_test_split(df.drop(y_col, axis=1), df[y_col], test_size=0.1, random_state=42)

df_train_full = X_train_full.copy()
df_train_full[y_col] = y_train_full.copy()
df_test = X_test.copy()
df_test[y_col] = y_test.copy()

X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=42)

df_train = X_train.copy()
df_train[y_col] = y_train.copy()
df_val = X_val.copy()
df_val[y_col] = y_val.copy()

'perspective' column contains 3 unique classes


In [21]:
max_train_pics = 180
max_train_val_pics = int(.2 * max_train_pics)

df_train = hp.sample_df(df_train, 
                      y_col,
                      n_sample_per_class=max_train_pics,
                      replace = True
                      )


df_test = hp.sample_df(df_test, 
                      y_col,
                      n_sample_per_class=max_train_val_pics,
                      replace = True
                      )


df_val = hp.sample_df(df_val, 
                      y_col,
                      n_sample_per_class=max_train_val_pics,
                      replace = True
                      )

In [40]:
# Efficient Net expects 380X380
# Facenet expects 160x160
IMG_SIZE = 380
#IMG_SIZE = 160
batch_size = 50

#need to pass the class names to the .flow_from_dataframe method
train_classes = df_train[y_col].unique()
train_classes = np.sort(train_classes).tolist()

datagen_train=tf.keras.preprocessing.image.ImageDataGenerator(
                      rescale=1./255,
                       rotation_range=15,
                       width_shift_range=0.2,
                       height_shift_range=0.2,
                       shear_range=0.005,
                       zoom_range=[0.9, 1.4],
                       horizontal_flip=True,
                       vertical_flip=False,
                       brightness_range=(.8,1.2),
                       fill_mode='nearest',
                      )

train_generator=datagen_train.flow_from_dataframe(
    dataframe=df_train,
    directory=images_dir,
    shuffle=True,
    x_col=x_col,
    y_col=y_col,
  # save_to_dir=savepath + '/aug_images', 
    classes=train_classes,
    class_mode='sparse',
    target_size=(IMG_SIZE, IMG_SIZE), 
    batch_size=batch_size)

Found 540 validated image filenames belonging to 3 classes.


In [41]:
datagen_val=tf.keras.preprocessing.image.ImageDataGenerator(
                      rescale=1./255
                      )

val_generator=datagen_val.flow_from_dataframe(
    dataframe=df_val,
    directory=images_dir, 
    x_col=x_col,
    y_col=y_col,
    class_mode='sparse',
    classes=train_classes,
    shuffle=False,
    target_size=(IMG_SIZE, IMG_SIZE), 
    batch_size=batch_size)

test_generator = datagen_val.flow_from_dataframe(
    dataframe=df_test,
    directory=images_dir, 
    x_col=x_col,
    y_col=y_col,
    class_mode='sparse',
    classes=train_classes,
    shuffle=False,
    target_size=(IMG_SIZE, IMG_SIZE), 
    batch_size=batch_size)

Found 108 validated image filenames belonging to 3 classes.
Found 108 validated image filenames belonging to 3 classes.


In [42]:
# Since the step_size can not be in decimal places it has been rounded
steps_train = round(train_generator.n / batch_size)
steps_val = round(val_generator.n / batch_size)
steps_test = round(test_generator.n / batch_size)

# Get the class-numbers for all the images in the training- and test-sets
cls_train = train_generator.classes
cls_test = test_generator.classes
len(cls_train)

540

In [43]:
from sklearn.utils.class_weight import compute_class_weight
class_weight = compute_class_weight(class_weight='balanced',
                                    classes=np.unique(cls_train),
                                    y=cls_train)
class_weight


# class_weight has to be a dictionary format
class_weight_dict = { i : class_weight[i] for i in range(0, len(class_weight) ) }
class_weight_dict

# getting number of classes
num_classes = len(class_weight)
num_classes

3

In [44]:
input_shape = (IMG_SIZE, IMG_SIZE, 3)
img_inputs = tf.keras.Input(shape=input_shape)

In [45]:
optimizer = tf.keras.optimizers.Adam(lr=1e-5)
optimizer.lr.numpy()

1e-05

In [46]:
from datetime import datetime
logs = "data/logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
# Uncomment when running for the fist time and change file name accordingly
logs = "data/logs/" +"efficientnet"

tboard_callback = tf.keras.callbacks.TensorBoard(log_dir = logs,
                                                 histogram_freq = 1,
                                                 profile_batch = '500,520')

In [47]:
loss = 'sparse_categorical_crossentropy'
lst_metrics = ['sparse_categorical_accuracy']

In [48]:
efficient_net.compile(optimizer=optimizer, loss=loss, metrics=lst_metrics)

In [49]:
epochs = 80

In [50]:
history = efficient_net.fit(train_generator,
                    epochs=epochs,
                    steps_per_epoch=steps_train,
                    class_weight=class_weight_dict,
                    validation_data=val_generator,
                    callbacks = [tboard_callback]
                    )

Epoch 1/80


KeyboardInterrupt: 

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir=logs

In [None]:
result = model.evaluate(test_generator, steps=steps_test)
print("Test-set classification accuracy: {0:.2%}".format(result[1]))

### facenet train

In [51]:
!pip install tensorflow-addons



In [55]:
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
loss = tfa.losses.TripletSemiHardLoss()

TensorFlow Addons offers no support for the nightly versions of TensorFlow. Some things might work, some other might not. 
If you encounter a bug, do not file an issue on GitHub.


In [52]:
lst_metrics = ['accuracy']

In [53]:
model.trainable = False

In [56]:
model.compile(optimizer=optimizer, loss=loss, metrics=lst_metrics)

In [54]:
logs = "data/logs/" +"facenet"

tboard_callback = tf.keras.callbacks.TensorBoard(log_dir = logs,
                                                 histogram_freq = 1,
                                                 profile_batch = '500,520')

In [None]:
history_facenet = model.fit(train_generator,
                    epochs=epochs,
                    steps_per_epoch=steps_train,
                    class_weight=class_weight_dict,
                    validation_data=val_generator,
                    callbacks = [tboard_callback]