In [1]:
from keras.utils import set_random_seed
from keras import Input, Model
from keras.layers import Dense, Rescaling, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.metrics import BinaryAccuracy, Precision, Recall, AUC
from keras.optimizers import Adam
from classification_models.keras import Classifiers
import matplotlib.pyplot as plt
from numpy import asarray
from pickle import dump

In [2]:
set_random_seed(10)

In [3]:
img_size = (256, 256)
dim1, dim2 = img_size
batch_size=32
class_names = ['normal', 'pneumonia']
dataset_dir = '/Users/snoopy/Desktop/Other/Harvard/Final Project/RSNA/RSNA-full-jpeg'
color_mode = 'rgb'
num_channels = 3
num_samples = {'train':[12403, 3607],
               'val':[4134, 1202]}
class_weights = {0: ((num_samples['train'][0]+num_samples['val'][0]+num_samples['train'][1]+num_samples['val'][1]))/(num_samples['train'][0]+num_samples['val'][0]),
                 1: ((num_samples['train'][0]+num_samples['val'][0]+num_samples['train'][1]+num_samples['val'][1]))/(num_samples['train'][1]+num_samples['val'][1])}

In [4]:
train_datagen = ImageDataGenerator(rotation_range=35,
                                   width_shift_range=0.1,
                                   height_shift_range=0.2,
                                   brightness_range=(0.15, 1.25),
                                   zoom_range=0.1,
                                   fill_mode='constant',
                                   cval=0,
                                   horizontal_flip=True)
test_datagen = ImageDataGenerator()
val_datagen = ImageDataGenerator()

In [5]:
train_generator = train_datagen.flow_from_directory(directory=dataset_dir+'/train',
                                                    target_size=img_size,
                                                    color_mode=color_mode,
                                                    classes=class_names,
                                                    class_mode='binary',
                                                    shuffle=True)
test_generator = test_datagen.flow_from_directory(directory=dataset_dir+'/test',
                                                  target_size=img_size,
                                                  color_mode=color_mode,
                                                  classes=class_names,
                                                  class_mode='binary')
val_generator = val_datagen.flow_from_directory(directory=dataset_dir+'/val',
                                                target_size=img_size,
                                                color_mode=color_mode,
                                                classes=class_names,
                                                class_mode='binary')

Found 16010 images belonging to 2 classes.
Found 5338 images belonging to 2 classes.
Found 5336 images belonging to 2 classes.


In [6]:
ResNet18, preprocess_input = Classifiers.get('resnet18')

In [7]:
inputs = Input(shape=(dim1, dim2, num_channels,))
scaled_input =Rescaling(scale=1./127.5, offset=-1)(inputs)
resnet_18_model = ResNet18(input_shape=(dim1, dim2, num_channels),
                           input_tensor=scaled_input,
                           weights='imagenet',
                           include_top=False)
x = GlobalAveragePooling2D()(resnet_18_model.output)
outputs = Dense(units=1, activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=outputs, name='resnet_18_classifier')

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-08-01 10:34:26.726628: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-08-01 10:34:26.726793: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [8]:
model.summary()

Model: "resnet_18_classifier"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 rescaling (Rescaling)          (None, 256, 256, 3)  0           ['input_1[0][0]']                
                                                                                                  
 bn_data (BatchNormalization)   (None, 256, 256, 3)  9           ['rescaling[0][0]']              
                                                                                                  
 zero_padding2d (ZeroPadding2D)  (None, 262, 262, 3)  0          ['bn_data[0][0

In [9]:
model.compile(optimizer=Adam(learning_rate=1e-5),
              loss='binary_crossentropy',
              metrics=[BinaryAccuracy(name='accuracy'),
                       Precision(thresholds=0.5, name='precision'),
                       Recall(thresholds=0.5, name='recall'),
                       AUC(name='auc')])

In [10]:
early_stopping = EarlyStopping(monitor='val_loss',
                               min_delta=0.0001,
                               patience=10,
                               verbose=1,
                               mode='min',
                               restore_best_weights=True)

In [11]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=5,
                              verbose=1,
                              mode='min',
                              min_delta=0.0001)

In [12]:
checkpointing = ModelCheckpoint(filepath='rsna-resnet-augmented-checkpoints/rsna-resnet-augmented-checkpoint-{epoch}',
                                monitor='val_loss',
                                verbose=1,
                                save_best_only=True,
                                mode='min',
                                save_freq='epoch')

In [13]:
train_steps = int((num_samples['train'][0]+num_samples['train'][1])/batch_size)+1
val_steps = int((num_samples['val'][0]+num_samples['val'][1])/batch_size)+1

In [None]:
history = model.fit(train_generator,
                    epochs=100,
                    verbose=1,
                    callbacks=[early_stopping, reduce_lr, checkpointing],
                    validation_data=val_generator,
                    shuffle=True,
                    class_weight=class_weights,
                    steps_per_epoch=train_steps,
                    validation_steps=val_steps)

In [None]:
model.save('rsna-resnet-augmented', overwrite=False, include_optimizer=True)

In [None]:
with open('rsna-resnet-augmented-history', 'ab') as f:
    dump(history, f)

In [None]:
train_precision = asarray(history.history['precision'])
train_recall = asarray(history.history['recall'])
val_precision = asarray(history.history['val_precision'])
val_recall = asarray(history.history['val_recall'])
train_f1 = 2./(1./train_precision+1./train_recall)
val_f1 = 2./(1./val_precision+1./val_recall)

In [None]:
epochs = range(len(history.history['loss']))

In [None]:
print("Train precision: ", train_precision[-1])
print("Train recall: ", train_recall[-1])
print("Validation precision: ", val_precision[-1])
print("Vlaidation recall: ", val_recall[-1])
print("F1 score in train set: ", train_f1[-1])
print("F1 score on validation set: ", val_f1[-1])

In [None]:
plt.figure()
plt.plot(epochs, train_f1, 'r', label='Train F1 score')
plt.plot(epochs, val_f1, 'b', label='Validation F1 score')
plt.title('Train and validation F1 scores')
plt.legend()
plt.show()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.figure()
plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
plt.figure()
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.show()

In [None]:
test_history = model.evaluate(test_generator, verbose=1)

In [None]:
test_acc = test_history[1]
test_precision = test_history[2]
test_recall = test_history[3]
test_f1 = 2./(1./test_precision+1./test_recall)

In [None]:
print("Test accuracy: ", test_acc)
print("Test precision: ", test_precision)
print("Test recall: ", test_recall)
print("F1 score on test set: ", test_f1)