### Loading necessary libraries

In [1]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.image import imread
import seaborn as sns
from PIL import Image
import glob

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report

from tensorflow.keras.callbacks import EarlyStopping

import joblib

### Defining files' path

In [10]:
data_dir = '../data/'
model_dir = '../model/'

dir_ = os.listdir(data_dir)

model_file = model_dir + 'CNN.joblib'

In [11]:
dir_

['valid', 'train', 'test', 'images2predict']

### Retrieving directories list

In [15]:
all_dir = dict.fromkeys(dir_)

for d in dir_ :
    all_dir[d] = glob.glob(data_dir+d+'/*')

In [16]:
all_dir

{'valid': ['../data/valid/AFRICAN FIREFINCH',
  '../data/valid/ALBERTS TOWHEE',
  '../data/valid/AFRICAN EMERALD CUCKOO',
  '../data/valid/AMERICAN GOLDFINCH',
  '../data/valid/ALBATROSS',
  '../data/valid/AFRICAN CROWNED CRANE',
  '../data/valid/ALEXANDRINE PARAKEET',
  '../data/valid/AMERICAN BITTERN',
  '../data/valid/AFRICAN OYSTER CATCHER',
  '../data/valid/AMERICAN KESTREL',
  '../data/valid/ABBOTTS BABBLER',
  '../data/valid/AMERICAN AVOCET',
  '../data/valid/ABYSSINIAN GROUND HORNBILL',
  '../data/valid/AFRICAN PIED HORNBILL',
  '../data/valid/ALPINE CHOUGH',
  '../data/valid/ABBOTTS BOOBY',
  '../data/valid/AMERICAN FLAMINGO',
  '../data/valid/ALTAMIRA YELLOWTHROAT',
  '../data/valid/AMERICAN COOT',
  '../data/valid/AFRICAN PYGMY GOOSE'],
 'train': ['../data/train/AFRICAN EMERALD CUCKOO',
  '../data/train/AMERICAN GOLDFINCH',
  '../data/train/AFRICAN FIREFINCH',
  '../data/train/ALEXANDRINE PARAKEET',
  '../data/train/AFRICAN PYGMY GOOSE',
  '../data/train/ABBOTTS BABBLER',
  

### Retrieving data files

In [None]:
files = dict.fromkeys(dir_)

### Investigate dataset resolution distribution

In [None]:
# Directory name
d = 'Parasitized'

# Empty shape list
shape = []

# Empty image list
image_un = []

for file in files[d] :
    # Read image file
    image = plt.imread(file)

    # Add image to image list
    image_un.append(image)

    shape.append(image.shape)

# Sum the shape
shape_sum = tuple(map(sum, tuple(zip(*shape)))) 
shape_median = tuple(np.percentile(dim, 50) for dim in zip(*shape))
shape_per25 = tuple(np.percentile(dim, 25) for dim in zip(*shape))
shape_per75 = tuple(np.percentile(dim, 75) for dim in zip(*shape))
shape_per90 = tuple(np.percentile(dim, 90) for dim in zip(*shape))
shape_min = tuple(map(min, tuple(zip(*shape))))
shape_max = tuple(map(max, tuple(zip(*shape))))

# Calcualting shape average
shape_avg = tuple( i // len(shape) for i in shape_sum)

print(f'The average dimensions for images in the {d} folder is {shape_avg}')

print(f'The maximum dimensions for images in the {d} folder is {shape_max}')

print(f'The mimimum dimensions for images in the {d} folder is {shape_min}')

print(f'The median dimensions for images in the {d} folder is {shape_median}')

print(f'The 25th percentile dimensions for images in the {d} folder is {shape_per25}')

print(f'The 75th percentile dimensions for images in the {d} folder is {shape_per75}')

print(f'The 90th percentile dimensions for images in the {d} folder is {shape_per90}')

In [None]:
res = (224, 224)

### Image Augmentation and Scaling

In [None]:
img_gen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

### Apply Image augmentation and scaling to dataset

In [None]:
train_data = img_gen.flow_from_directory(data_dir,
                                         target_size=res, batch_size=64, class_mode='binary',
                                         shuffle=True,subset='training')

train_data.class_indices

In [None]:
val_data = img_gen.flow_from_directory(data_dir,
                                        target_size=res,batch_size=1, shuffle=False,
                                        class_mode='binary',
                                        subset='validation')

val_data.class_indices

In [None]:
test_data = img_gen.flow_from_directory(data_dir,
                                        target_size=res, batch_size=1, shuffle=False,
                                        class_mode='binary',
                                        subset='validation')

test_data.class_indices

### Load model if it already exists

In [4]:
if os.path.exists(model_file) :
    cnn = joblib.load('../model/CNN.joblib')
    scaler = joblib.load('../model/CNN_scaler.joblib')
    print('Model exists')

### Define model if it does not already exist

In [9]:
if not os.path.exists(model_file) :
    print('Model does not exists')
    

Model does not exists


### Train the model on the dataset

### Save model

In [None]:
joblib.dump(cnn, "../model/CNN.joblib")
joblib.dump(scaler, "../model/CNN_scaler.joblib")