### Imports

In [1]:
import pandas as pd

# 1.1 Call libraries for image processing
#     Another preprocessing option is text and sequence
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 1.2, Libraries for building sequential CNN model
#      A model is composed of sequence of layered objects
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Input
from tensorflow.keras.utils import plot_model

# 1.5 OS related
import os, time

# 1.6 For ROC plotting
import matplotlib.pyplot as plt

# 1.7
import numpy as np
# conda install scikit-learn
from sklearn import metrics


In [171]:
# B. Define constants

# 2. Our constants
# 2.1 Dimensions to which our images will be adjusted
img_width, img_height = 150, 150

# 2.2 Data folder containing all training images, maybe in folders: cats and dogs

train_data_dir = r"D:\chest_xray\chest_xray\train"
test_data_dir =  r"D:\chest_xray\chest_xray\test"

# 2.3 What is the total number of training images
#      that should be generated (not what are available)
nb_train_samples = 1341+3875   # Actual: 1000 + 1000 (more) =    2000

# 2.4 Data folder containing all validation images

validation_data_dir = r"D:\chest_xray\chest_xray\val"

# 2.5 What is the total no of validation samples that should
#     be generated?
nb_validation_samples = 16   # Actual: 8 + 8 (more) =  16

# Some hyperparameters

# 2.6 Batch size to train at one go:
batch_size = 32             # No of batches = 5216/163 = 32
                            # So per epoch we have 32 batches

# 2.7 How many epochs of training?
epochs = 5                  # For lack of time, let us make it just 5.

# 2.8 No of test samples
test_generator_samples = 390+234

# 2.9 For test data, what should be batch size
test_batch_size = 16    # This is different from training batch size

# 2.10
input_shape = (img_width, img_height, 3)


### Model Building

In [241]:
model_1 = Sequential()

In [242]:
model_1.add(Input(shape=input_shape))

In [243]:
model_1.add(Conv2D(
    filters=128,
    kernel_size= (3,3),
    strides = (1,1),
    padding='valid',
    activation= 'relu'
))

In [244]:
model_1.add(Conv2D(
    filters=64,
    kernel_size= (3,3),
    strides = (1,1),
    activation= 'relu'
))

In [245]:
model_1.add(Conv2D(
    filters=64,
    kernel_size= (3,3),
    activation= 'relu'
))

In [246]:
model_1.add(Conv2D(
    filters=64,
    kernel_size= (4,4),
    activation= 'relu'
))

In [247]:

model_1.summary()

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_59 (Conv2D)          (None, 148, 148, 128)     3584      
                                                                 
 conv2d_60 (Conv2D)          (None, 146, 146, 64)      73792     
                                                                 
 conv2d_61 (Conv2D)          (None, 144, 144, 64)      36928     
                                                                 
 conv2d_62 (Conv2D)          (None, 141, 141, 64)      65600     
                                                                 
Total params: 179,904
Trainable params: 179,904
Non-trainable params: 0
_________________________________________________________________


In [248]:

model_1.add(Flatten())

In [249]:
1024*2

2048

In [250]:
model_1.add(Dense(2048, activation='relu'))


ResourceExhaustedError: {{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:CPU:0}} OOM when allocating tensor with shape[1272384,2048] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu [Op:Mul]

In [208]:
model_1.add(Dense(512, activation='relu'))


In [209]:
model_1.add(Dense(64, activation='relu'))

In [210]:
model_1.add(Dense(1, activation='sigmoid'))


In [211]:

model_1.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy', 'binary_crossentropy']
)

In [212]:
model_1.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_47 (Conv2D)          (None, 148, 148, 128)     3584      
                                                                 
 conv2d_48 (Conv2D)          (None, 146, 146, 64)      73792     
                                                                 
 conv2d_49 (Conv2D)          (None, 144, 144, 64)      36928     
                                                                 
 conv2d_50 (Conv2D)          (None, 141, 141, 64)      65600     
                                                                 
 flatten_9 (Flatten)         (None, 1272384)           0         
                                                                 
 dense_30 (Dense)            (None, 256)               325730560 
                                                                 
 dense_31 (Dense)            (None, 128)             

### Data Generators

In [213]:

tr_dtgen = ImageDataGenerator(
                              rescale=1. / 255,      # Normalize colour intensities in 0-1 range
                              shear_range=0.2,       # Shear varies from 0-0.2
                              zoom_range=0.2,
                              horizontal_flip=True,
                              )

In [214]:
train_generator = tr_dtgen.flow_from_directory(
                                               train_data_dir,       # Data folder of cats & dogs
                                               target_size=(img_width, img_height),  # Resize images
                                               batch_size=batch_size,  # Return images in batches
                                               class_mode='binary'   # Output labels will be 1D binary labels
                                                                     # [[1],[0],[0],[1]]
                                                                     # If 'categorical' output labels will be
                                                                     # 2D OneHotEncoded: [[1,0],[0,1],[0,1],[1,0]]
                                                                     # If 'binary' use 'sigmoid' at output
                                                                     # If 'categorical' use softmax at output

                                                )

Found 5216 images belonging to 2 classes.


In [215]:
val_dtgen= ImageDataGenerator(rescale=1. / 255)


In [216]:
validation_generator = val_dtgen.flow_from_directory(
                                                     validation_data_dir,
                                                     target_size=(img_width, img_height),   # Resize images
                                                     batch_size=4,    # batch size to augment at a time
                                                     class_mode='binary'  # Return 1D array of class labels
                                                     )

Found 16 images belonging to 2 classes.


In [217]:
start = time.time()
history = model_1.fit_generator(
                              # First argument is always data generator
                              generator = train_generator,
                              # How many batches per epoch?
                              # Can be any number as generator loops indefinitely
                              steps_per_epoch=nb_train_samples // batch_size,
                              # No of epochs
                              epochs=epochs,
                              # Get validation data from validation generator
                              validation_data=validation_generator,
                              verbose = 1,
                              validation_steps=nb_validation_samples // 4
                              )

end = time.time()
(end - start)/60

  history = model_1.fit_generator(


Epoch 1/5
Epoch 2/5
Epoch 3/5

KeyboardInterrupt: 

### Model evaluation & Predictions

In [90]:
result = model_1.evaluate(
                        validation_generator,
                        verbose = 1,
                        steps = 4        # How many batches
                        )


# 7.1.1
result



[1.1958789825439453, 0.5625, 1.1958789825439453]

In [None]:

# 8.0 Make predictions

# 8.1 Using generator
#     https://www.tensorflow.org/api_docs/python/tf/keras/Model#predict
pred = model_1.predict(validation_generator, steps = 2)

# 8.1.1
pred[:5]

In [None]:
test_dtgen = ImageDataGenerator(rescale=1. / 255)

# 9.0.1 Test data

test_generator = test_dtgen.flow_from_directory(
                                                test_data_dir,
                                                # Resize images
                                                target_size=(img_width, img_height),
                                                # batch size to augment at a time
                                                batch_size=batch_size,
                                                # Return 1D array of class labels
                                                class_mode='binary'
                                                )

In [None]:

# 9.0.2 Get iterator
#       and a batch of (images, image_labels)
im = test_generator    # Get iterator
images = next(im)      # Get images
images[0].shape

In [None]:

# 9.1 Make predictions
results = model_1.predict(images[0])

# 9.2 Plot the images and check with
#     results
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
plt.figure(figsize= (10,10))
for i in range(results.shape[0]):
    plt.subplot(4,4,i+1)
    _=imshow(images[0][i]) ;

plt.show() ;
# 9.3 Predicted labels
print(results)  