In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Activation, Dense, Dropout, Flatten, BatchNormalization, Conv2D, MaxPooling2D, Lambda, Input, AveragePooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing import image
from tensorflow.keras import backend as K
import tensorflow as tf 
import matplotlib.pyplot as plt 
import numpy as np 
import pandas as pd
import random
#import cv2
import os

In [2]:
# Check filenames 
image_names=list(os.listdir("./Resources/images"))
image_names.sort()
print(image_names)

['All', 'Atelectasis', 'Cardiomegaly', 'Consolidation', 'Covid', 'Edema', 'Enlarged_Cardiomediastinum', 'Fracture', 'Lung_Lesion', 'Lung_Opacity', 'No_Finding', 'Pleural_Other', 'Pneumonia', 'Pneumothorax', 'Support_Devices']


In [3]:
# Directory with Atelectasis
atele_dir=os.path.join("./Resources/images/Atelectasis")
df =pd.DataFrame.from_records({"file_name":os.listdir(atele_dir),"condition": "Atelectasis"})
for f in image_names[2:]:
    folder_path="./Resources/images/" + f
    temp_df= pd.DataFrame.from_records({"file_name":os.listdir(folder_path),"condition": f })
    df=df.append(temp_df)
    
df["condition"].value_counts()

No_Finding                    1898
Atelectasis                    286
Covid                          184
Pneumothorax                   164
Lung_Opacity                   156
Edema                          155
Support_Devices                154
Consolidation                  145
Enlarged_Cardiomediastinum     143
Fracture                       142
Cardiomegaly                   136
Lung_Lesion                    127
Pneumonia                      118
Pleural_Other                   99
Name: condition, dtype: int64

In [4]:
# balance classes - random Random Sampling No_Finding
df_no_finding = df[df["condition"]=="No_Finding"].sample(n=150, random_state=42)
df_balanced = df[df["condition"]!="No_Finding"].append(df_no_finding)
df_balanced["condition"].value_counts()

Atelectasis                   286
Covid                         184
Pneumothorax                  164
Lung_Opacity                  156
Edema                         155
Support_Devices               154
No_Finding                    150
Consolidation                 145
Enlarged_Cardiomediastinum    143
Fracture                      142
Cardiomegaly                  136
Lung_Lesion                   127
Pneumonia                     118
Pleural_Other                  99
Name: condition, dtype: int64

In [5]:
df_balanced["file_name"] = "./Resources/images/All/"+df_balanced["file_name"]

In [6]:
# separate into train/test 
X=df_balanced["file_name"]
y=df_balanced["condition"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
img_rows, img_cols = 28, 28
if K.image_data_format() == 'th':
    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

129    ./Resources/images/All/patient04362-study8-vie...
130    ./Resources/images/All/patient04689-study1-vie...
90     ./Resources/images/All/patient02661-study5-vie...
107    ./Resources/images/All/patient03108-study1-vie...
127    ./Resources/images/All/patient04362-study10-vi...
                             ...                        
65     ./Resources/images/All/patient14737-study1-vie...
46     ./Resources/images/All/patient01343-study2-vie...
81     ./Resources/images/All/patient03011-study1-vie...
103    ./Resources/images/All/patient09147-study1-vie...
109    ./Resources/images/All/patient01682-study3-vie...
Name: file_name, Length: 1511, dtype: object


In [8]:
img_width, img_height=200,200
batch_size=128


# Rescale images 
datagen= ImageDataGenerator(rescale=1/255)

# Flow training images in batches of 128 using train_data 
train_generator= datagen.flow_from_directory(
                X_train,
                target_size=(img_width,img_height),
                batch_size=batch_size,
                classes= ["Covid", "Atelectasis", "Cardiomegaly", "Consolidation", "Edema", "Enlarged_Cardiomediastinum", "Fracture", 
                          "Lung_Lesion", "Lung_Opacity", "No_Finding", "Pleural_Other", "Pneumonia", "Pneumothorax", "Support_Devices"],
                class_mode='categorical', shuffle=True, seed=30)

# Flow test images in 

datagen2=ImageDataGenerator(rescale=1/255)

test_generator=datagen2.flow_from_directory(
                X_test,
                target_size=(img_width,img_height),
                batch_size=batch_size,
                classes= ["Covid", "Atelectasis", "Cardiomegaly", "Consolidation", "Edema", "Enlarged_Cardiomediastinum", "Fracture", 
                          "Lung_Lesion", "Lung_Opacity", "No_Finding", "Pleural_Other", "Pneumonia", "Pneumothorax", "Support_Devices"],
                class_mode='categorical',subset="validation", shuffle=True, seed=30)


TypeError: expected str, bytes or os.PathLike object, not Series

In [None]:
# CNN MODEL- conv-batch-maxpool-dropout

model=Sequential()
model.add(Conv2D(32, kernel_size=3, activation="relu", input_shape=(200,200,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(strides=(2,2)))
model.add(Dropout(.3))

model.add(Conv2D(32, kernel_size=3, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling2D(strides=(2,2)))
model.add(Dropout(.5))

model.add(Conv2D(64, kernel_size=3, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling2D(strides=(2,2)))
model.add(Dropout(.4))

model.add(Conv2D(64, kernel_size=3, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling2D(strides=(2,2)))
model.add(Dropout(.3))

model.add(Flatten())
model.add(Dense(512,activation="relu"))
model.add(Dense(128,activation="relu"))
model.add(Dropout(.4))

model.add(Dense(14, activation="softmax"))

In [None]:
model.summary()

In [None]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["categorical_accuracy"])


from keras.callbacks import History
history=History()

Model.fit_generator(
            train_generator, steps_per_epoch=900,
            epochs=100, callbacks=[history],
            validation_data=test_generator, 
            validation_steps=2)