In [42]:
import numpy as np
import pandas as pd
import os

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

import tensorflow as tf

from sklearn.metrics import r2_score

from IPython.display import display,Image
import matplotlib.pyplot as plt
import matplotlib.cm as cm

In [43]:
def create_df(directory):
    """A function to convert a directory of particular images into pandas dataframe.
    Depending on the name of directory, all the images are assigned the label.
    
    Fuction returns a Pandas Dataframe"""


    df = pd.DataFrame()
    for folder in os.listdir(directory):

        path = [(os.path.join(directory,folder,i)) for i in os.listdir(os.path.join(directory,folder))]
            
        if df.empty:
            df = pd.DataFrame({'file_path':path})
            if folder == 'Parasitized':
                df['Infected'] = 1
            else:
                df['Infected'] = 0
        else:
            temp_df =  pd.DataFrame({'file_path':path})
            if folder == 'Uninfected':
                temp_df['Infected'] = 0
            else:
                temp_df['Infected'] = 1
            
            df =  pd.concat([df,temp_df])

        
        

    return df


In [44]:
# Collect all images in dataframe

df = create_df(r'D:\python projects\malaria detection\malaria\cell_images')

df

Unnamed: 0,file_path,Infected
0,D:\python projects\malaria detection\malaria\c...,1
1,D:\python projects\malaria detection\malaria\c...,1
2,D:\python projects\malaria detection\malaria\c...,1
3,D:\python projects\malaria detection\malaria\c...,1
4,D:\python projects\malaria detection\malaria\c...,1
...,...,...
13775,D:\python projects\malaria detection\malaria\c...,0
13776,D:\python projects\malaria detection\malaria\c...,0
13777,D:\python projects\malaria detection\malaria\c...,0
13778,D:\python projects\malaria detection\malaria\c...,0


In [45]:
# Shuffle the dataframe

df = df.sample(frac=1)
df

Unnamed: 0,file_path,Infected
10969,D:\python projects\malaria detection\malaria\c...,0
8330,D:\python projects\malaria detection\malaria\c...,1
3632,D:\python projects\malaria detection\malaria\c...,0
7289,D:\python projects\malaria detection\malaria\c...,1
8149,D:\python projects\malaria detection\malaria\c...,1
...,...,...
4513,D:\python projects\malaria detection\malaria\c...,1
7883,D:\python projects\malaria detection\malaria\c...,1
8982,D:\python projects\malaria detection\malaria\c...,1
48,D:\python projects\malaria detection\malaria\c...,1


In [46]:
train_df, test_df = train_test_split(df,train_size=0.8,random_state=42)

In [47]:
train_df

Unnamed: 0,file_path,Infected
5191,D:\python projects\malaria detection\malaria\c...,1
3158,D:\python projects\malaria detection\malaria\c...,0
6931,D:\python projects\malaria detection\malaria\c...,1
10947,D:\python projects\malaria detection\malaria\c...,1
4595,D:\python projects\malaria detection\malaria\c...,1
...,...,...
1448,D:\python projects\malaria detection\malaria\c...,1
8221,D:\python projects\malaria detection\malaria\c...,0
8949,D:\python projects\malaria detection\malaria\c...,1
5281,D:\python projects\malaria detection\malaria\c...,1


In [48]:
test_df

Unnamed: 0,file_path,Infected
8314,D:\python projects\malaria detection\malaria\c...,1
11529,D:\python projects\malaria detection\malaria\c...,1
12327,D:\python projects\malaria detection\malaria\c...,1
1536,D:\python projects\malaria detection\malaria\c...,1
1667,D:\python projects\malaria detection\malaria\c...,0
...,...,...
2316,D:\python projects\malaria detection\malaria\c...,0
2099,D:\python projects\malaria detection\malaria\c...,1
7957,D:\python projects\malaria detection\malaria\c...,1
9961,D:\python projects\malaria detection\malaria\c...,1


In [49]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)
test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

img_size = 120

In [50]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='file_path',
    y_col='Infected',
    target_size=(img_size, img_size),
    color_mode='rgb',
    class_mode='raw',
    batch_size=64,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='file_path',
    y_col='Infected',
    target_size=(img_size, img_size),
    color_mode='rgb',
    class_mode='raw',
    batch_size=64,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='file_path',
    y_col='Infected',
    target_size=(img_size, img_size),
    color_mode='rgb',
    class_mode='raw',
    batch_size=64,
    shuffle=False
)



Found 17637 validated image filenames.
Found 4409 validated image filenames.
Found 5512 validated image filenames.


In [32]:
# model architecture

inputs = tf.keras.Input(shape=(img_size, img_size, 3))
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

In [33]:
model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']

)

model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 120, 120, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 118, 118, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 59, 59, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 57, 57, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 28, 28, 32)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 26, 26, 32)        9248

In [34]:


history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=15,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )
    ]
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [113]:
predicted_disease = np.squeeze(model.predict(test_images))
true_damage = test_images.labels

array([1, 0, 0, ..., 0, 1, 0], dtype=int64)

In [116]:
accuracy_score(true_damage,predicted_disease.round())

0.9573657474600871

In [117]:
confusion_matrix(true_damage,predicted_disease.round())

array([[2678,   79],
       [ 156, 2599]], dtype=int64)