# **Modelling and Evaluation Notebook**

## Objectives

* Achieve the Second Business Requirement:

    * Engineer features for modelling
    * Data classification of healthy Cherry leaves and those infected with powedery mildew 

## Inputs

* inputs/cherry_leaves_dataset/train
* inputs/cherry_leaves_dataset/test
* inputs/cherry_leaves_dataset/validate
* embedded image shapes 

## Outputs

* Images distribution plot in train, validation, and test set
* Image augmentation
* Class indices to change prediction inference in labels
* Machine learning model creation and training
* Save model
* Learning curve plot for model performance
* Model evaluation on pickle file
* Prediction on the random image file

---

# Change working directory

##### Import Packages

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.image import imread

##### We need to change the working directory from its current folder to its parent folder

In [None]:
import os
current_dir = os.getcwd()
current_dir

In [None]:
os.chdir(os.path.dirname(current_dir))
print("You set a new current directory")

In [None]:
current_dir = os.getcwd()
current_dir

## Set Input Directories

In [None]:
my_data_dir = 'inputs/cherry_leaves_dataset/cherry-leaves'
train_path = my_data_dir + '/train'
val_path = my_data_dir + '/validation'
test_path = my_data_dir + '/test'

## Set Output Directories

In [None]:
version = 'v1'

file_path = f'outputs/{version}'
version_file_path = os.path.join(current_dir, file_path)

if os.path.exists(version_file_path):
    print(f"version {version} already exists. Create a new version")
    pass
else:
    os.makedirs(name=file_path)

## Set Labels

In [None]:
labels = os.listdir(train_dir)
print('Label for the images are',labels)

## Set Image Shape

In [None]:
## Import saved image shape embedding
import joblib
version = 'v1'
image_shape = joblib.load(filename=f"outputs/{version}/image_shape.pkl")
image_shape

## Confirm Amount of Images to Train, Test and Validate Data

In [None]:
df_freq = pd.DataFrame([]) 
for folder in ['train', 'validation', 'test']:
  for label in labels:
    label_data = os.listdir(root_data_dir+'/'+ folder + '/' + label)
    df_freq = df_freq.append(
        pd.Series(data={'Set': folder,
                        'Label': label,
                        'Frequency':int(len(label_data))
                        }), ignore_index=True)
    
    print(f"* {folder} - {label}: {len(label_data)} images")

print("\n")
sns.set_style("whitegrid")
plt.figure(figsize=(8,5))
sns.barplot(data=df_freq, x='Set', y='Frequency', hue='Label')
plt.savefig(f'{file_path}/labels_distribution.png', bbox_inches='tight', dpi=150)
plt.show()

---

## Image Data Augmentation

As seen in the bargraph plot above, the train set contains ____ files which is not suffiient for a deep nueral network learning model; therefor the dataset size needs to be increased with image augmentation.

##### Loader Perameters

In [None]:
batch_size = 32
data_color_mode = 'rgb'
data_class_mode = 'binary'

##### Import DataImageGenerator

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

##### Initialize DataImageGenerator

In [None]:
augmented_data = ImageDataGenerator(
                                   rotation_range=20,
                                   width_shift_range=0.1, 
                                   height_shift_range=0.1,
                                   shear_range=0.1,
                                   zoom_range=0.1,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest',
                                   rescale=1./255
                              )

##### Augment Training Dataset Images

In [None]:
train_set = augmented_data.flow_from_directory(train_dir,
                                              target_size=image_shape[:2],
                                              color_mode=data_color_mode,
                                              batch_size=batch_size,
                                              class_mode=data_class_mode,
                                              shuffle=True
                                              )

train_set.class_indices

##### Augment Validation Dataset Images

In [None]:
validation_set = ImageDataGenerator(rescale=1./255).flow_from_directory(validation_dir,
                                                          target_size=image_shape[:2],
                                                          color_mode=data_color_mode,
                                                          batch_size=batch_size,
                                                          class_mode=data_class_mode,
                                                          shuffle=False
                                                          )

validation_set.class_indices

##### Augment Test Dataset Images

In [None]:
test_set = ImageDataGenerator(rescale=1./255).flow_from_directory(test_dir,
                                                    target_size=image_shape[:2],
                                                    color_mode=data_color_mode,
                                                    batch_size=batch_size,
                                                    class_mode=data_class_mode,
                                                    shuffle=False
                                                    )

test_set.class_indices

##### Plot Augmented Training Image

In [None]:
def plot_augmented_image(label_set, display_size=3):
    label_class = label_set.class_indices
    for _ in range(display_size):
        img, label = label_set.next()
        print(img.shape)
        img_class = list(label_class.keys())[
            list(label_class.values()).index(label[0])]
        plt.imshow(img[0])
        plt.axis('off')
        plt.title(img_class)
        plt.show()

##### Plot Training Image Set

In [None]:
plot_augmented_image(train_set)

##### Plot Validation Image Set

In [None]:
plot_augmented_image(validation_set)

##### Plot Test Image Set

In [None]:
plot_augmented_image(test_set)

##### Save Class Indices

In [None]:
joblib.dump(value=train_set.class_indices ,
            filename=f"{file_path}/class_indices.pkl")

---

## Creating the Model

In order to meet the business requirements for the customer, a Conventional Neural Network (CNN) model as selected, that will learn from the dominant features contained within the powdery mildew images of the Cherry Leaves.

##### Import ML Packages

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

##### Perameter Definintions

In [None]:
EPOCHS = 15
OUTPUT_DIR = 'outputs/v1'

##### ML Model

In [None]:
def create_model():
    model = Sequential()
    model.add(Conv2D(filters=8, kernel_size=(3,3),input_shape=image_shape, activation='relu',))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(filters=8, kernel_size=(3,3),input_shape=image_shape, activation='relu',))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(filters=4, kernel_size=(3,3),input_shape=image_shape, activation='relu',))
    model.add(MaxPooling2D(pool_size=(2, 2)))


    model.add(Flatten())
    model.add(Dense(128, activation = 'relu'))

    model.add(Dropout(0.4))
    model.add(Dense(1, activation = 'sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    
    return model

##### Model Summary

In [None]:
create_model().summary()

##### Early Stopping

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=3)

##### Fit Model for Training

In [None]:
model = create_model()
model.fit(train_set,
          epochs=EPOCHS,
          steps_per_epoch = len(train_set.classes) // batch_size,
          validation_data = validation_set,
          callbacks=[early_stop],
          verbose=1)

##### Save Model

In [None]:
model.save('outputs/v1/malaria_detector_model.h5')

---

## Model Performance

---

##### Model Learning Curve

In [None]:
losses = pd.DataFrame(model.history.history)

sns.set_style("whitegrid")
losses[['loss','val_loss']].plot(style='.-')
plt.title("Loss")
plt.savefig(f'{file_path}/model_training_losses.png', bbox_inches='tight', dpi=150)
plt.show()

print("\n")
losses[['accuracy','val_accuracy']].plot(style='.-')
plt.title("Accuracy")
plt.savefig(f'{file_path}/model_training_acc.png', bbox_inches='tight', dpi=150)
plt.show()

##### Model Evaluation

Load saved model

In [None]:
from keras.models import load_model
model = load_model(f'{OUTPUT_DIR}/mildew_detector_model.h5')

Evaluating the model on the test set

In [None]:
evaluation = model.evaluate(test_set)

Save the Evaluation Pickle

In [None]:
joblib.dump(value=evaluation, filename=f'{OUTPUT_DIR}/evaluation.pk1')

##### Predict on New Data

In [None]:
from tensorflow.keras.preprocessing import image

pointer = 80
label = labels[1] # select from the powdery mildew leaf
img = os.listdir(f'{test_dir}/{label}')[pointer]
file_to_load = os.path.join(test_dir, label, img)

pil_image = image.load_img(file_to_load, target_size = image_shape, color_mode='rgb')
print(f'Image shape: {pil_image.size}, Image mode: {pil_image.mode}')
pil_imag

##### Convert Images to Array for Prediction Purpose

In [None]:
my_image = image.img_to_array(pil_image)
my_image = np.expand_dims(my_image, axis=0)/255
print(my_image.shape)

##### Class Probabilities Prediction

In [None]:
prediction_probability = model.predict(my_image)[0,0]

target_map = {v: k for k, v in train_set.class_indices.items()}
predicted_class =  target_map[prediction_probability > 0.5]  

if predicted_class == target_map[0]: prediction_probability = 1 - prediction_probability

print(f'Prediction probability: {prediction_probability}')
print(f'Predicted class: {predicted_class}')

---


## Push to Github

---

* git add .
* git commit -m "->message describing alterations to cells/code and purpose"
* git push