In [1]:
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
from tensorflow.python.client import device_lib
device_lib.list_local_devices() 

Using TensorFlow backend.


[name: "/cpu:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 1200915790785680602, name: "/gpu:0"
 device_type: "GPU"
 memory_limit: 2790653952
 locality {
   bus_id: 1
 }
 incarnation: 4604696554727198697
 physical_device_desc: "device: 0, name: GeForce GTX 780, pci bus id: 0000:02:00.0"]

In [2]:
def get_shuffled_splitted_data(path):
    df = pd.read_csv(path)

    # Shuffle and split data
    X_train, X_test, X_val = np.split(df.sample(frac=1), [int(.7*len(df)), int(.9*len(df))])
    
    # Pop labels and transform them to vectors
    y_train, y_test, y_val = X_train.pop("label"), X_test.pop("label"), X_val.pop("label")
    y_train, y_test, y_val = y_train.values.reshape((-1, 1)), y_test.values.reshape((-1, 1)), y_val.values.reshape((-1, 1))
    
    # Reshape the features for CNN
    X_train = X_train.as_matrix().reshape(X_train.shape[0], 1, 124, 124)
    X_test = X_test.as_matrix().reshape(X_test.shape[0], 1, 124, 124)
    X_val = X_val.as_matrix().reshape(X_val.shape[0], 1, 124, 124)
    
    # Norm data
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_val = X_val.astype('float32')
    X_train /= 255
    X_test /= 255
    X_val /= 255
    
    # Convert labels to categorical values
    y_train = keras.utils.to_categorical(y_train, 2)
    y_test = keras.utils.to_categorical(y_test, 2)
    y_val = keras.utils.to_categorical(y_val, 2)
    
    return X_train, y_train, X_test, y_test, X_val, y_val
    
X_train, y_train, X_test, y_test, X_val, y_val = get_shuffled_splitted_data('../data/subset-1-HnxTny.txt.csv')

In [3]:
with tf.device('/gpu:0'):

    import keras
    from keras.preprocessing.image import ImageDataGenerator
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Activation, Flatten
    from keras.layers import Conv2D, MaxPooling2D
    from keras import backend as K
    from keras.losses import categorical_crossentropy
    from keras.optimizers import Adadelta
    K.set_image_dim_ordering('th')

    batch_size = 32
    num_classes = 2
    epochs = 5

    # The data, shuffled and split between train and test sets:
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')
    print(X_val.shape[0], 'validation samples')

    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(1, 124, 124)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss=categorical_crossentropy,
                  optimizer=Adadelta(),
                  metrics=['accuracy'])

    model.fit(X_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(X_test, y_test))

    test_score = model.evaluate(X_test, y_test, verbose=0)
    print('Test loss:', test_score[0])
    print('Test accuracy:', test_score[1])
    
    val_score = model.evaluate(X_val, y_val, verbose=0)
    print('Val loss:', val_score[0])
    print('Val accuracy:', val_score[1])

X_train shape: (5318, 1, 124, 124)
5318 train samples
1520 test samples
760 validation samples
Train on 5318 samples, validate on 1520 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 2.76347803842e-06
Test accuracy: 1.0
Val loss: 3.52140280508e-06
Val accuracy: 1.0


In [40]:
import numpy as np
from matplotlib import pyplot as plt

from keras.preprocessing.image import img_to_array
from keras.applications.imagenet_utils import preprocess_input

from vis.utils import utils
from vis.visualization import visualize_saliency
# The name of the layer we want to visualize
# (see model definition in vggnet.py)
layer_idx = [idx for idx, layer in enumerate(model.layers)][0]

heatmaps = []
seed_img = X_train[0, :]
x = np.expand_dims(img_to_array(seed_img), axis=0)
x = preprocess_input(x)
x = np.swapaxes(x, 1, 2)
pred_class = np.argmax(model.predict(x))

# Here we are asking it to show attention such that prob of `pred_class` is maximized.
seed_img = np.swapaxes(seed_img, 0, 1)
heatmap = visualize_saliency(model, layer_idx, [pred_class], seed_img)
heatmaps.append(heatmap)

plt.axis('off')
plt.imshow(utils.stitch_images(heatmaps))
plt.title('Saliency map')
plt.show()

Working on filters: [0]


ValueError: Cannot feed value of shape (1, 124, 124, 1) for Tensor 'conv2d_1_input:0', which has shape '(?, 1, 124, 124)'

In [3]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
y_test_pred = model.predict(X_test, batch_size=32, verbose=0)
y_test_pred = np.round(y_test_pred).astype(int)

def plot_sample(ax, sample, title):
    # The first line contains 65000 values for any reason
    img = sample.reshape(124, 124)[1:, 1:]
    ax.imshow(img, cmap='gray',  interpolation='nearest')
    ax.axis('off')
    ax.set_title(title)

def has_tumor(one_hot_vector):
    return one_hot_vector.argmax()
    
def plot_samples(count, samples, labels, predicted, main_title):
    # Shuffle datapoints
    idx = np.random.choice(np.arange(samples.shape[0]), count, replace=False)
    samples, labels, predicted = (samples[idx], labels[idx], predicted[idx])
    cols = 4
    rows = count // cols
    assert rows * cols == count, 'Number of samples must be a multiple of 4'
    fig, axes = plt.subplots(rows, cols, figsize=(16, rows * 4))
    for i, ax in enumerate(axes.flat):
        plot_sample(ax, samples[i], '#{}, Tumor: {}, Predicted: {}'.format(
            idx[i], has_tumor(labels[i]), has_tumor(predicted[i])))
    fig.suptitle(main_title)

# Always the same results
np.random.seed(0)
plot_samples(4, X_test, y_test, y_test_pred, 'Testing set')
no_tumors = y_test.argmax(axis=1) == 0
plot_samples(4, X_test[no_tumors], y_test[no_tumors],
             y_test_pred[no_tumors], 'Testing set - No tumor')
plot_samples(4, X_test[no_tumors == False], y_test[no_tumors == False],
             y_test_pred[no_tumors == False], 'Testing set - Tumor')

keras.utils.plot_model(model, show_shapes=True, to_file='model-Small-tk.png')
SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

NameError: name 'model' is not defined