In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
import tensorflow.keras.layers as layers
from tensorflow.keras.layers import Activation, Dense, Dropout, Flatten, BatchNormalization, Conv2D, MaxPooling2D, Lambda, Input, AveragePooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing import image, image_dataset_from_directory
from tensorflow.keras import backend as K
import tensorflow as tf 
import matplotlib.pyplot as plt 
import numpy as np 
import pandas as pd
import random
#import cv2
import os

In [2]:
# Mounting the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Changing directory to images
os.chdir("/content/drive/MyDrive/Data Analysis Bootcamp")

In [4]:
# Check Classes 
image_names=list(os.listdir("Resources/Images/Classes"))
image_names.sort()
print(image_names)

['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Covid', 'Edema', 'Enlarged_Cardiomediastinum', 'Fracture', 'Lung_Lesion', 'Lung_Opacity', 'No_Finding', 'Pleural_Other', 'Pneumonia', 'Pneumothorax', 'Support_Devices']


In [5]:
# Directory with Atelectasis
atele_dir=os.path.join("Resources/Images/Classes/Atelectasis")
df =pd.DataFrame.from_records({"file_name":os.listdir(atele_dir),"condition": "Atelectasis"})
for f in image_names[2:]:
    folder_path="Resources/Images/Classes/" + f
    temp_df= pd.DataFrame.from_records({"file_name":os.listdir(folder_path),"condition": f })
    df=df.append(temp_df)
    
df["condition"].value_counts()

No_Finding                    1898
Covid                          184
Pneumothorax                   164
Lung_Opacity                   156
Edema                          155
Support_Devices                154
Consolidation                  145
Enlarged_Cardiomediastinum     143
Atelectasis                    143
Fracture                       142
Lung_Lesion                    127
Pneumonia                      118
Pleural_Other                   99
Name: condition, dtype: int64

In [6]:
#Exporting Label CSV
df.to_csv("Labels.csv")

In [7]:
# balance classes - random Random Sampling No_Finding
#df_no_finding = df[df["condition"]=="No_Finding"].sample(n=150, random_state=42)
#df_balanced = df[df["condition"]!="No_Finding"].append(df_no_finding)
#df_balanced["condition"].value_counts()

In [8]:
#df_balanced["file_name"] = "Resources/Images/Classes/"+df_balanced["file_name"]

In [9]:
# Settings
batch_size = 32
img_height = 180
img_width = 180

In [10]:
data_dir = "Resources/Images/Classes/"
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 3764 files belonging to 14 classes.
Using 3012 files for training.


In [11]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 3764 files belonging to 14 classes.
Using 752 files for validation.


In [12]:
class_names = train_ds.class_names
print(class_names)

['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Covid', 'Edema', 'Enlarged_Cardiomediastinum', 'Fracture', 'Lung_Lesion', 'Lung_Opacity', 'No_Finding', 'Pleural_Other', 'Pneumonia', 'Pneumothorax', 'Support_Devices']


In [13]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

KeyboardInterrupt: ignored

<Figure size 720x720 with 0 Axes>

In [14]:
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)

In [15]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [16]:
num_classes = 14

model = tf.keras.Sequential([
  layers.experimental.preprocessing.Rescaling(1./255),
  layers.Conv2D(32, 3, activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

In [17]:
model.compile(
  optimizer='adam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

In [None]:
model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=3
)

Epoch 1/3
20/95 [=====>........................] - ETA: 9:22 - loss: 2.0478 - accuracy: 0.4750

In [None]:
img_width, img_height=200,200
batch_size=128


# Rescale images 
datagen= ImageDataGenerator(rescale=1/255)

# Flow training images in batches of 128 using train_data 
train_generator= datagen.flow_from_directory(
                X_train,
                target_size=(img_width,img_height),
                batch_size=batch_size,
                classes= ["Covid", "Atelectasis", "Cardiomegaly", "Consolidation", "Edema", "Enlarged_Cardiomediastinum", "Fracture", 
                          "Lung_Lesion", "Lung_Opacity", "No_Finding", "Pleural_Other", "Pneumonia", "Pneumothorax", "Support_Devices"],
                class_mode='categorical', shuffle=True, seed=30)

# Flow test images in 

datagen2=ImageDataGenerator(rescale=1/255)

test_generator=datagen2.flow_from_directory(
                X_test,
                target_size=(img_width,img_height),
                batch_size=batch_size,
                classes= ["Covid", "Atelectasis", "Cardiomegaly", "Consolidation", "Edema", "Enlarged_Cardiomediastinum", "Fracture", 
                          "Lung_Lesion", "Lung_Opacity", "No_Finding", "Pleural_Other", "Pneumonia", "Pneumothorax", "Support_Devices"],
                class_mode='categorical',subset="validation", shuffle=True, seed=30)


In [None]:
# CNN MODEL- conv-batch-maxpool-dropout

model=Sequential()
model.add(Conv2D(32, kernel_size=3, activation="relu", input_shape=(200,200,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(strides=(2,2)))
model.add(Dropout(.3))

model.add(Conv2D(32, kernel_size=3, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling2D(strides=(2,2)))
model.add(Dropout(.5))

model.add(Conv2D(64, kernel_size=3, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling2D(strides=(2,2)))
model.add(Dropout(.4))

model.add(Conv2D(64, kernel_size=3, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling2D(strides=(2,2)))
model.add(Dropout(.3))

model.add(Flatten())
model.add(Dense(512,activation="relu"))
model.add(Dense(128,activation="relu"))
model.add(Dropout(.4))

model.add(Dense(14, activation="softmax"))

In [None]:
model.summary()

In [None]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["categorical_accuracy"])


from keras.callbacks import History
history=History()

Model.fit_generator(
            train_generator, steps_per_epoch=900,
            epochs=100, callbacks=[history],
            validation_data=test_generator, 
            validation_steps=2)