In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.image import imread

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = '/content'  # remove the full path
!kaggle datasets download -d ansubkhan/malaria-detection  # https://www.kaggle.com/ansubkhan/malaria-detection

# unzip and remove the zip
!unzip \*.zip && rm *.zip

In [None]:
my_data_dir = '/content/Malaria Detection/cell image'
print(os.listdir(my_data_dir) ) # returns 'test', and 'train

In [None]:
my_data_dir = 'C:\\Sandbox\\GitHub\\TF_Bootcamp\\DATA\\cell_images' 
print(os.listdir(my_data_dir) ) # returns 'test', and 'train

In [None]:
test_path = my_data_dir+'\\test\\'
train_path = my_data_dir+'\\train\\'

print(os.listdir(test_path))
print(os.listdir(train_path))
print(os.listdir(train_path+'\\parasitized')[0])

In [None]:
infected_cell_path = train_path+'\\parasitized'+'\\C100P61ThinF_IMG_20150918_144104_cell_162.png'
infected_cell= imread(infected_cell_path)
print(infected_cell.shape)
plt.imshow(infected_cell)

In [None]:
uninfected_cell_path = train_path+'\\uninfected\\'+os.listdir(train_path+'\\uninfected')[0]
uninfected_cell = imread(uninfected_cell_path)
print(uninfected_cell.shape)
plt.imshow(uninfected_cell)

**Let's check how many images there are.**

In [None]:
# Let's check how many images there are.

print(len(os.listdir(train_path+'\\parasitized')))
print(len(os.listdir(train_path+'\\uninfected')))

# Let's find out the average dimensions of these images
print(uninfected_cell.shape)
print(infected_cell.shape)

In [None]:
# Issue size is not the same.
# One option: https://stackoverflow.com/questions/1507084/how-to-check-dimensions-of-all-images-in-a-directory-using-python
dim1 = []
dim2 = []
for image_filename in os.listdir(test_path+'\\uninfected'):
    
    img = imread(test_path+'\\uninfected'+'\\'+image_filename)
    d1,d2,colors = img.shape
    dim1.append(d1)
    dim2.append(d2)

In [None]:
sns.jointplot(dim1,dim2)

In [None]:
print(np.mean(dim1))
print(np.mean(dim2))

In [None]:
new_image_shape = (130,130,3)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_gen = ImageDataGenerator(rotation_range=20, # rotate the image 20 degrees
                               width_shift_range=0.10, # Shift the pic width by a max of 5%
                               height_shift_range=0.10, # Shift the pic height by a max of 5%
                               rescale=1/255, # Rescale the image by normalzing it.
                               shear_range=0.1, # Shear means cutting away part of the image (max 10%)
                               zoom_range=0.1, # Zoom in by 10% max
                               horizontal_flip=True, # Allo horizontal flipping
                               fill_mode='nearest' # Fill in missing pixels with the nearest filled value
                              )

plt.imshow(infected_cell)

In [None]:
# show so random transformed images
plt.imshow(image_gen.random_transform(infected_cell))

In [None]:
# show so random transformed images
plt.imshow(image_gen.random_transform(infected_cell))

In [None]:
plt.imshow(image_gen.random_transform(infected_cell))

In [None]:
image_gen.flow_from_directory(train_path)

In [None]:
image_gen.flow_from_directory(test_path)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D

In [None]:
#https://stats.stackexchange.com/questions/148139/rules-for-selecting-convolutional-neural-network-hyperparameters
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3,3),input_shape = new_image_shape, activation='relu',))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=64, kernel_size=(3,3),input_shape = new_image_shape, activation='relu',))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=64, kernel_size=(3,3),input_shape = new_image_shape, activation='relu',))
model.add(MaxPooling2D(pool_size=(2, 2)))


model.add(Flatten())


model.add(Dense(128))
model.add(Activation('relu'))

# Dropouts help reduce overfitting by randomly turning neurons off during training.Turn off 50% of neurons.

model.add(Dropout(0.5))

# Last layer, remember its binary so we use sigmoid
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss',patience=2)

In [None]:
#help(image_gen.flow_from_directory)

batch_size = 16

train_image_gen = image_gen.flow_from_directory(train_path,
                                               target_size = new_image_shape[:2],
                                                color_mode='rgb',
                                               batch_size=batch_size,
                                               class_mode='binary')

In [None]:
test_image_gen = image_gen.flow_from_directory(test_path,
                                               target_size= new_image_shape[:2],
                                               color_mode='rgb',
                                               batch_size=batch_size,
                                               class_mode='binary',shuffle=False)

In [None]:
train_image_gen.class_indices

In [None]:
import warnings
warnings.filterwarnings('ignore')

epochs = 20
history = model.fit_generator(train_image_gen,epochs=epochs,
                              validation_data=test_image_gen,
                             callbacks=[early_stop])

In [None]:
# from tensorflow.keras.models import load_model
# model.save('malaria_detector.h5')

In [None]:
model.metrics_names
model.evaluate_generator(test_image_gen)

In [None]:
history_df = pd.DataFrame(model.history.history)
history_df[['loss','val_loss']].plot()

In [None]:
from tensorflow.keras.preprocessing import image

In [None]:
# https://datascience.stackexchange.com/questions/13894/how-to-get-predictions-with-predict-generator-on-streaming-test-data-in-keras
pred_probabilities = model.predict_generator(test_image_gen)

In [None]:
pred_probabilities

In [None]:
test_image_gen.classes

In [None]:
y_predictions = pred_probabilities > 0.5

In [None]:
# Numpy can treat this as True/False for us
y_predictions

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print(classification_report(test_image_gen.classes, y_predictions))

In [None]:
confusion_matrix(test_image_gen.classes, y_predictions)

In [None]:
infected_image = image.load_img(infected_cell_path,target_size = new_image_shape)



In [None]:
infected_image

In [None]:
type(infected_image)

In [None]:
infected_image = image.img_to_array(infected_image)

In [None]:
type(infected_image)

In [None]:
infected_image = np.expand_dims(infected_image, axis=0)

In [None]:
infected_image.shape

In [None]:
model.predict(infected_image)   # not infected is 0 else 1

In [None]:
train_image_gen.class_indices

In [None]:
test_image_gen.class_indices