# Data Prepration

In [None]:
import os

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import seaborn as sns

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import confusion_matrix, classification_report

In [None]:
df = pd.DataFrame(columns=["image_name","label"])

In [None]:
"../data/HB_PollenDataset/images/P52248-14r.jpg"

In [None]:
IMAGE_DIR = "../data/HB_PollenDataset/images/"

In [None]:
for index, image in enumerate(os.listdir(IMAGE_DIR)):
    if image.startswith("P"):
        df.loc[index,"image_name"] = image
        df.loc[index,"label"] = "POLLEN"
    elif image.startswith("N"):
        df.loc[index] = [image,"NO POLLEN"]

In [None]:
train_DataGenerator = ImageDataGenerator(
    rescale=1/255,
    validation_split=0.2
)

test_DataGenerator = ImageDataGenerator(
    rescale=1/255,
)

In [None]:
train_df, test_df = train_test_split(df, test_size=0.2, shuffle=True, random_state=1)

In [None]:
train_images = train_DataGenerator.flow_from_dataframe(
    dataframe=train_df,
    x_col="image_name",
    y_col="label",
    directory = IMAGE_DIR,
    target_size=(128, 128),
    class_mode="binary",
    seed=42,
    subset='training'
)

val_images = train_DataGenerator.flow_from_dataframe(
    dataframe=train_df,
    x_col="image_name",
    y_col="label",
    directory = IMAGE_DIR,
    target_size=(128, 128),
    class_mode="binary",
    seed=42,
    subset='validation'
)

test_images = test_DataGenerator.flow_from_dataframe(
    dataframe=test_df,
    x_col="image_name",
    y_col="label",
    directory = IMAGE_DIR,
    target_size=(128, 128),
    class_mode="binary"
)

In [None]:
test_images.image_shape

# Define the Model

In [None]:
model = Sequential()

In [None]:
model.add(Conv2D(filters = 16, kernel_size = (3,3), activation = "relu", input_shape = (128, 128, 3),
                 name= "Input_Layer"))
model.add(MaxPool2D((2,2), name = "Pooling_1"))

In [None]:
model.add(Conv2D(filters = 32, kernel_size = (3,3), activation = "relu", name= "Conv_Layer"))
model.add(MaxPool2D((2,2), name = "Pooling_2"))

In [None]:
model.add(Flatten())
model.add(Dense(128, activation="relu", name="Dense_1"))
model.add(Dense(64, activation="relu", name="Dense_2"))
model.add(Dense(1, activation="sigmoid", name="Output_Layer"))

In [None]:
model.summary()

In [None]:
plot_model(model, to_file='cnn_model.png', show_shapes=True, dpi=300)

# Compile the model

In [None]:
model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics="accuracy"
)

# Train the Model

In [None]:
train_images.class_indices

In [None]:
es_callback = EarlyStopping(
    monitor = 'val_loss',
    patience=3,
    restore_best_weights = True
    
)

In [None]:
history = model.fit(
    train_images,
    epochs = 20,
    validation_data=val_images,
    callbacks=[es_callback]
)

In [None]:
history.history.keys()

In [None]:
plt.plot(history.history['accuracy'], label="train_accuracy")
plt.plot(history.history['val_accuracy'], label="val_accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

plt.plot(history.history['loss'], label="train_loss")
plt.plot(history.history['val_loss'], label="val_loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
model.save("../models/cnn_model.h5", save_format='h5')

# Evaluate the model

In [None]:
model.evaluate(test_images)

In [None]:
test_images.class_indices

In [None]:
predictions = np.reshape(model.predict(test_images) > 0.6,-1).astype(int)

In [None]:
cm = confusion_matrix(test_images.labels, predictions)

In [None]:
plt.figure(dpi=70)
sns.heatmap(cm, annot=True, cmap="Blues")
plt.xticks(ticks=[0.5,1.5], labels=['NO POLLEN', 'POLLEN'])
plt.yticks(ticks=[0.5,1.5], labels=['NO POLLEN', 'POLLEN'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
print(classification_report(test_images.labels, predictions))

In [None]:
np.unique(test_images.labels, return_counts=True)