In [1]:
import os
from glob import *

import cv2
import numpy as np
import pandas as pd
from keras.layers import Conv2D
from keras.layers import Dense, Dropout, Flatten, MaxPool2D, BatchNormalization
from keras.models import Sequential
from keras.optimizers import Adagrad
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from skimage.feature import hog
from skimage.io import imread
from skimage.transform import rescale
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight


Using TensorFlow backend.


In [2]:
labels = pd.read_csv('sample_labels.csv')
images = glob(os.path.join("images", "*.png"))

len(images)


5606

In [4]:
WIDTH = 128
HEIGHT = 128


def proc_images(images_to_process):
    dict_characters = {
        'No Finding': 0,
        'Consolidation': 1,
        'Infiltration': 2,
        'Pneumothorax': 3,
        'Effusion': 4,
        'Nodule Mass': 5,
        'Atelectasis': 6,
        "Other": 7
    }

    x = []
    y = []

    for img in images_to_process:
        base = os.path.basename(img)
        # Read and resize image
        full_size_image = cv2.imread(img)
        finding = labels["Finding Labels"][labels["Image Index"] == base].values[0]
        symbol = "|"
        if symbol in finding:
            continue
        else:
            val = dict_characters.get(finding, 7)
            y.append(val)
            image_np = cv2.resize(full_size_image, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_CUBIC)
            x.append(image_np)

    return x, y


In [15]:
X, y = proc_images(images[:300])
df = pd.DataFrame()
df["images"] = X
df["labels"] = y
print(len(df), df.images[0].shape)



252 (128, 128, 3)


In [26]:
partial_model = True

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

if partial_model:
    X_train = X_train[0:100] 
    y_train = y_train[0:100]
    X_test = X_test[0:20] 
    y_test = y_test[0:20]


In [27]:
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

In [28]:
X_trainShape = X_train.shape[1] * X_train.shape[2] * X_train.shape[3]
X_testShape = X_test.shape[1] * X_test.shape[2] * X_test.shape[3]
X_trainFlat = X_train.reshape(X_train.shape[0], X_trainShape)
X_testFlat = X_test.reshape(X_test.shape[0], X_testShape)



In [30]:
Y_trainHot = to_categorical(y_train, num_classes=8)
y_testHot = to_categorical(y_test, num_classes=8)


In [31]:
input_shape = (WIDTH, HEIGHT, 3)
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(3, 3), padding='Same', activation='relu', input_shape=input_shape, strides=1))
model.add(Conv2D(filters=32, kernel_size=(3, 3), padding='Same',activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding='Same', activation='relu'))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding='Same', activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(filters=86, kernel_size=(3, 3), padding='Same', activation='relu'))
model.add(Conv2D(filters=86, kernel_size=(3, 3), padding='Same', activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Flatten())
#model.add(Dense(1024, activation = "relu"))
#model.add(Dropout(0.5))
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(8, activation="softmax"))
# Define the optimizer
optimizer = Adagrad()
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])


In [32]:
datagen = ImageDataGenerator()
datagen.fit(X_train)


In [33]:
epochs = 10

history = model.fit_generator(datagen.flow(X_train, Y_trainHot),
                              steps_per_epoch=len(X_train) / 32, epochs=epochs, validation_data=[X_test, y_testHot])    


Instructions for updating:
Use tf.cast instead.


Epoch 1/10










Epoch 2/10










Epoch 3/10










Epoch 4/10










Epoch 5/10










Epoch 6/10










Epoch 7/10










Epoch 8/10










Epoch 9/10










Epoch 10/10










In [34]:
model.evaluate(X_test, y_testHot) 



[8.05904769897461, 0.5]

In [35]:
if not partial_model:
    import pickle
    
    with open("cnn", mode='wb') as binary_file:
        pickle.dump(model, binary_file)


In [36]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_trainFlat, y_train)
clf.score(X_testFlat, y_test)


0.45

In [38]:
from sklearn import svm

clf = svm.SVC()
clf.fit(X_trainFlat, y_train)
clf.score(X_testFlat, y_test)




0.5