In [1]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# to display all the columns of the dataframe in the notebook
pd.pandas.set_option('display.max_columns', None)

In [10]:
# Reading the csv file 
# Here we will be using a subset of the original dataset

train = pd.read_csv('CheXpert-v1.0-small/train_sample.csv', nrows=2000)

In [11]:
train.head()

Unnamed: 0,path,no_finding,enlarged_cardiomediastinum,cardiomegaly,lung_opacity,lung_lesion,edema,consolidation,pneumonia,atelectasis,pneumothorax,pleural_effusion,pleural_other,fracture,no_of_labels
0,CheXpert-v1.0-small/train/patient00001/study1/...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,CheXpert-v1.0-small/train/patient00002/study2/...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
2,CheXpert-v1.0-small/train/patient00002/study1/...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
3,CheXpert-v1.0-small/train/patient00002/study1/...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
4,CheXpert-v1.0-small/train/patient00003/study1/...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [12]:
# Columns of interest
train.columns

Index(['path', 'no_finding', 'enlarged_cardiomediastinum', 'cardiomegaly',
       'lung_opacity', 'lung_lesion', 'edema', 'consolidation', 'pneumonia',
       'atelectasis', 'pneumothorax', 'pleural_effusion', 'pleural_other',
       'fracture', 'no_of_labels'],
      dtype='object')

In [27]:
# Install Pillow and restart the Jupyter Server

# import sys
# import PIL
# from PIL import Image
# sys.modules['Image'] = Image 

# from IPython.display import display
# from PIL import Image

In [None]:
# loading and preprocessing 
train_image = []
for i in tqdm(range(train.shape[0])):
    img = image.load_img(''+train['path'][i]+'',target_size=(400,400,3))
    img = image.img_to_array(img)
    img = img/255
    train_image.append(img)
X = np.array(train_image)


  0%|                                                                                      | 0/2000 [00:00<?, ?it/s][A
  0%|▏                                                                             | 6/2000 [00:00<00:40, 49.60it/s][A
  1%|▉                                                                            | 24/2000 [00:00<00:31, 63.35it/s][A
  2%|█▋                                                                           | 43/2000 [00:00<00:24, 78.87it/s][A
  3%|██▏                                                                          | 56/2000 [00:00<00:21, 89.39it/s][A
  4%|██▊                                                                         | 73/2000 [00:00<00:18, 103.89it/s][A
  5%|███▍                                                                        | 92/2000 [00:00<00:15, 120.12it/s][A
  5%|████                                                                       | 107/2000 [00:00<00:15, 124.63it/s][A
  6%|████▌                             

 98%|█████████████████████████████████████████████████████████████████████████▋ | 1965/2000 [00:18<00:01, 31.07it/s][A
 98%|█████████████████████████████████████████████████████████████████████████▊ | 1969/2000 [00:18<00:00, 32.36it/s][A
 99%|█████████████████████████████████████████████████████████████████████████▉ | 1973/2000 [00:18<00:00, 31.97it/s][A
 99%|██████████████████████████████████████████████████████████████████████████▏| 1978/2000 [00:18<00:00, 34.56it/s][A
 99%|██████████████████████████████████████████████████████████████████████████▎| 1982/2000 [00:18<00:00, 31.20it/s][A
 99%|██████████████████████████████████████████████████████████████████████████▍| 1986/2000 [00:18<00:00, 31.90it/s][A
100%|██████████████████████████████████████████████████████████████████████████▋| 1990/2000 [00:19<00:00, 18.29it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2000/2000 [00:19<00:00, 102.24it/s][A


In [None]:
X.shape

In [None]:
plt.imshow(X[2])

In [None]:
# loading the Multi-calssification binary classification label

y = np.array(train.drop(['path', 'no_of_labels'],axis=1))
y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.1)

In [None]:
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=(5, 5), activation="relu", input_shape=(400,400,3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters=32, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(25, activation='sigmoid'))

In [None]:
# print our model summary

model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Model Training

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=64)