In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import History
from sklearn.model_selection import train_test_split
import tensorflow as tf
import brainscanfunctions as funcs

In [3]:
test_directory = "/home/DAVIDSON/brwiedenbeck/public/brain_scans/test"
train_directory = "/home/DAVIDSON/brwiedenbeck/public/brain_scans/train"

In [4]:
X_test, y_test, counts_test = funcs.load_brain_scan(test_directory)

In [5]:
X_train, y_train, counts_train = funcs.load_brain_scan(train_directory)

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1)

In [6]:
counts_test

[448, 640, 12, 179]

In [7]:
counts_train

[1792, 2560, 52, 717]

In [8]:
# 2240 3200 64 896 = 6400 test is 25% of data set

In [9]:
img_rows = 208
img_cols = 176
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

In [10]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
X_val = X_val.astype('float32')
X_val /= 255

1279

In [10]:
num_classes = 4
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_val = keras.utils.to_categorical(y_val, num_classes)

In [11]:
len(y_test[0])

4

In [13]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
   horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

In [14]:
train_datagen.fit(X_train)

In [21]:
batch_size = 100
num_classes = 4
epochs = 10

In [22]:
model = keras.Sequential()
model.add(layers.Conv2D(filters=256, kernel_size=(9,9), strides=(3,3),
                        padding="same", activation='relu',
                        input_shape=input_shape))
model.add(layers.MaxPooling2D(pool_size=(3,3), padding='same'))
# model.add(layers.Dropout(.5))
model.add(layers.Conv2D(filters=256, kernel_size=(9,9), strides=(3,3),
                        padding="same", activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(3,3), padding='same'))
# model.add(layers.Dropout(.5))
model.add(layers.Flatten())
model.add(layers.Dense(100, activation='relu'))
# model.add(layers.Dropout(.5))
model.add(layers.Dense(num_classes, activation='softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 70, 59, 256)       20992     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 24, 20, 256)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 7, 256)         5308672   
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 3, 3, 256)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2304)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 100)               230500    
_________________________________________________________________
dense_3 (Dense)              (None, 4)                

In [23]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['categorical_accuracy', 'accuracy'])


In [24]:
history = History()
model.fit(train_datagen.flow(X_train, y_train,
                             batch_size=batch_size),
          # x=X_train,
          # y=y_train,
          #batch_size=batch_size,
          #class_weight=class_weights,
          epochs=epochs,
          verbose=1,
          callbacks=[history]
          # class_weight = {0:1, 1:1, 2:2, 3:1}
         )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x14c4a9741760>

In [25]:
train_score = model.evaluate(X_train, y_train, verbose=1)
print('Train loss:', train_score[0])
print('Train accuracy:', train_score[1])
val_score = model.evaluate(X_val, y_val, verbose=1)
print('validation loss:', val_score[0])
print('validation accuracy:', val_score[1])

Train loss: 0.7054663896560669
Train accuracy: 0.6677517294883728
validation loss: 0.740510880947113
validation accuracy: 0.6413255333900452


In [26]:
test_score = model.evaluate(X_test, y_test, verbose=1)
print('Test loss:', test_score[0])
print('Test accuracy:', test_score[1])

Test loss: 0.9691101908683777
Test accuracy: 0.5684128403663635


In [None]:
history.params

In [None]:
acc = history.history['accuracy']
loss = history.history['loss']

In [None]:
import matplotlib.pyplot as plt

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'b', label='Training acc')
# plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training accuracy')
plt.legend()

plt.savefig("graphs1.jpg")

plt.figure()

plt.plot(epochs, loss, 'b', label='Training loss')
# plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training loss')
plt.legend()

plt.savefig("graphs2.jpg")

plt.show()

In [None]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import MultiLabelBinarizer

In [None]:
def generate_class_weights(class_series, multi_class=True, one_hot_encoded=False):
  """
  Method to generate class weights given a set of multi-class or multi-label labels, both one-hot-encoded or not.
  Some examples of different formats of class_series and their outputs are:
    - generate_class_weights(['mango', 'lemon', 'banana', 'mango'], multi_class=True, one_hot_encoded=False)
    {'banana': 1.3333333333333333, 'lemon': 1.3333333333333333, 'mango': 0.6666666666666666}
    - generate_class_weights([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0]], multi_class=True, one_hot_encoded=True)
    {0: 0.6666666666666666, 1: 1.3333333333333333, 2: 1.3333333333333333}
    - generate_class_weights([['mango', 'lemon'], ['mango'], ['lemon', 'banana'], ['lemon']], multi_class=False, one_hot_encoded=False)
    {'banana': 1.3333333333333333, 'lemon': 0.4444444444444444, 'mango': 0.6666666666666666}
    - generate_class_weights([[0, 1, 1], [0, 0, 1], [1, 1, 0], [0, 1, 0]], multi_class=False, one_hot_encoded=True)
    {0: 1.3333333333333333, 1: 0.4444444444444444, 2: 0.6666666666666666}
  The output is a dictionary in the format { class_label: class_weight }. In case the input is one hot encoded, the class_label would be index
  of appareance of the label when the dataset was processed. 
  In multi_class this is np.unique(class_series) and in multi-label np.unique(np.concatenate(class_series)).
  Author: Angel Igareta (angel@igareta.com)
  """
  if multi_class:
    # If class is one hot encoded, transform to categorical labels to use compute_class_weight   
    if one_hot_encoded:
      class_series = np.argmax(class_series, axis=1)
  
    # Compute class weights with sklearn method
    class_labels = np.unique(class_series)
    class_weights = compute_class_weight(class_weight='balanced', classes=class_labels, y=class_series)
    return dict(zip(class_labels, class_weights))
  else:
    # It is neccessary that the multi-label values are one-hot encoded
    mlb = None
    if not one_hot_encoded:
      mlb = MultiLabelBinarizer()
      class_series = mlb.fit_transform(class_series)

    n_samples = len(class_series)
    n_classes = len(class_series[0])

    # Count each class frequency
    class_count = [0] * n_classes
    for classes in class_series:
        for index in range(n_classes):
            if classes[index] != 0:
                class_count[index] += 1
    
    # Compute class weights using balanced method
    class_weights = [n_samples / (n_classes * freq) if freq > 0 else 1 for freq in class_count]
    class_labels = range(len(class_weights)) if mlb is None else mlb.classes_
    return dict(zip(class_labels, class_weights))

In [None]:
class_weights = generate_class_weights(y_train, one_hot_encoded=True)

In [None]:
class_weights

In [None]:
y_train.shape

In [None]:
y_test.shape

In [None]:
y_val.shape

In [None]:
y_s = np.concatenate((y_train,y_test, y_val))

In [None]:
y_s.shape

In [None]:
class_weights = generate_class_weights(y_s, one_hot_encoded=True)

In [None]:
class_weights