# Simple Pollen Classifier

## File System

### Charles Dirs

In [46]:
# from google.colab import drive
# drive.mount('/drive')

# driveDir = "../drive/My Drive/Colab Notebooks/PollenImageClassification/PollenVsNotPollen/"
# notPollenDir = driveDir + "nonpollen"
# pollenDir = driveDir + "pollen"

# # Save Weights 
# weightsDir = "../drive/My Drive/Colab Notebooks/PollenImageClassification/PvNP_Weights/"
# saved_model_file = weightsDir+"StandardNetwork.h5"

# checkpointsDir = "checkpoints/"

### Robert Dirs

In [47]:
from google.colab import drive
drive.mount('/drive')

driveDir = "/drive/My Drive/Colab Notebooks/PollenCounter/SimplePollenClassifier/"
notPollenDir = driveDir + "NotPollen"
pollenDir = driveDir + "Pollen"

weightsDir = driveDir + "weights/"
checkpointsDir = driveDir + "checkpoints/"

saved_model_file = checkpointsDir + "model.150-0.17.hdf5"
xception_model_file = weightsDir + "XceptionBest.h5"


driveDir2 = "/drive/My Drive/PollenImageClassification/"
labelsDir = driveDir2 + "binaryClasses.txt"

weightsDir = driveDir2 + "PvNP_Weights/"
weightsFile = weightsDir + "Xception192x192.h5"

dDir = driveDir2
imgDir = dDir+"MicroscopeImagesandVideos/test.jpg"
dirs = [labelsDir, driveDir3]

Drive already mounted at /drive; to attempt to forcibly remount, call drive.mount("/drive", force_remount=True).


## Imports 

In [48]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import glob
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.optimizers import SGD

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import load_model


import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras import Model
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from tensorflow.keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D

from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

from sklearn.model_selection import StratifiedKFold

from tensorflow.keras.applications import VGG16

from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

## Load Data

In [49]:
def loadDataset(pollenDir="Pollen", notPollenDir="NotPollen"):
    pollenFiles = glob.glob(pollenDir+"/*.jpg")
    notPollenFiles = glob.glob(notPollenDir+"/*.jpg")
    
    X = []
    y = []
    
    for notPollenFile in notPollenFiles:
        img = cv2.imread(notPollenFile)
        X.append(img)
        y.append(0)
    
    for pollenFile in pollenFiles:
        img = cv2.imread(pollenFile)
        X.append(img)
        y.append(1)
    
    X = np.array(X)
    y = np.array(y)
    
    return [X, y]

def loadDataset2(classLabels,imagesDir):
    data = []
    labels = []
    glober = []
    classDict = []
    with open(classLabels,"r") as f:
        for line in f.readlines():
            key = line.strip()
            glober = glob.glob(imagesDir+key+"/*.jpg")
            myClass = np.array(glober)
            classDict.append(key)
            for i in myClass:
                #make sure to map images to brg color values
                #broadcast error because some images will read in (b,g,r,255)
                img = cv2.imread(i, cv2.IMREAD_COLOR) 
                data.append(img)
                labels.append(key)

    return [data,labels]

In [50]:
def addExtraImages():
    # Extra Images
    loc = "../drive/My Drive/USPollenKeras/Images Old/USPollen/"
    classes_file = "../drive/My Drive/USPollen/classes.txt"
    endLoc = "../drive/My Drive/Colab Notebooks/PollenImageClassification/Pollens/"
    
    classes = []
    test = []
    with open(classes_file,"r") as f:
      for line in f.readlines():
        key = line.strip()
        test.append(key)
        glober = glob.glob(loc+"*"+key+"*.jpg")
        classes.append(np.array(glober)) 
    
    counter = 34
    img = cv2.imread(classes[0][0], cv2.IMREAD_UNCHANGED)
    cv2.imwrite(endLoc+"pollen"+repr(counter)+".jpg",img)
    counter = counter+1
    
    counter = 35
    for i in classes:
        for j,val in enumerate(i):
            img = cv2.imread(val, cv2.IMREAD_UNCHANGED)
            if j<4:
                cv2.imwrite(endLoc+"pollen"+repr(counter)+".jpg",img)
                counter = counter+1

## Image Preprocessing

In [51]:
def resizeDataset(X, width=341, height=512):
    X_new = []
    for a in range(X.shape[0]):
        result = cv2.resize(X[a], (width,height))
        X_new.append(result)
    return np.array(X_new)

## Model


In [52]:
class PollenCNN(models.Sequential):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.data = None
        self.data_train = None
        self.data_test = None
        self.history = None
        
    def buildModel(self, input_shape=(1364, 2048, 3)):
        self.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
        self.add(layers.MaxPooling2D((2, 2)))
        self.add(layers.Conv2D(64, (3, 3), activation='relu'))
        self.add(layers.MaxPooling2D((2, 2)))
        self.add(layers.Conv2D(64, (3, 3), activation='relu'))
        self.add(layers.Flatten())
        self.add(layers.Dense(64, activation='relu'))
        self.add(layers.Dense(2))
        self.summary()
        self.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
        
    def buildTransferLearnedModel(self,input_shape=(1364, 2048, 3)):
        self.add(tf.keras.applications.VGG16(weights = "imagenet", include_top=False, input_shape = input_shape))

        for layer in self.layers[:17]:
            layer.trainable = False

        self.add(layers.Flatten())
        self.add(layers.Dense(256, activation='relu'))
        self.add(layers.Dropout(0.5))
        self.add(layers.Dense(4096, activation="relu"))
        self.add(layers.Dense(2, activation="softmax"))
        opt = SGD(lr=0.001, momentum=0.9)
        self.compile(optimizer=opt,
              loss= 'binary_crossentropy',
              metrics=['accuracy'])
        
    def trainModel(self, epochs=10, batch_size=32,augment_batch_size = 8, early_stopping = False, checkpoints = False):
        #Data Augmentation
        aug = ImageDataGenerator(
        rotation_range=20,
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        fill_mode="nearest")

        train_generator = aug.flow(self.data_train[0],self.data_train[1],batch_size = augment_batch_size)

        callbacks_arr = []
        if early_stopping:
            callbacks_arr.append(EarlyStopping())
        if checkpoints:
            callbacks_arr.append(ModelCheckpoint(checkpointsDir + "model.{epoch:02d}-{val_loss:.2f}.hdf5", save_freq="epoch"))
            
        self.history = model.fit(train_generator, epochs=epochs, batch_size=batch_size,
                    validation_data=(self.data_test[0], self.data_test[1]),
                    callbacks = callbacks_arr)
        
    def loadData(self, trainDirs=["Pollen", "NotPollen"], trainSubset=None):
        self.data = loadDataset(pollenDir=trainDirs[0], notPollenDir=trainDirs[1])
        
    def preprocess(self, resizeWidth = 341, resizeHeight = 512, testRatio = 0.1):
        # resize data 
        self.data[0] = resizeDataset(self.data[0], width=resizeWidth, height=resizeHeight)
        
        # normalize data between 0 and 1
        self.data[0] = np.array(self.data[0], dtype="float") / 255.0
        
        #Train test split (randomize training/testing, test size set to ratio to training)
        (trainX, testX, trainY, testY) = train_test_split(self.data[0], self.data[1],test_size=testRatio, random_state=42)
        
        self.data_train = [trainX,trainY]
        self.data_test = [testX,testY]
        
    def graphAccuracy(self):
        history = self.history
        plt.plot(history.history['accuracy'], label='accuracy')
        plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.ylim([0.5, 1])
        plt.legend(loc='lower right')
        
    def getBestEpochByMetric(self,metric = 'val_accuracy', max_or_min = 'max'):
        index = 0
        
        if max_or_min == 'min' :
            for x in range(len(self.history.history[metric])):
                if self.history.history[metric][x] < self.history.history[metric][index] :
                    index = x
        
        if max_or_min == 'max' :
            for x in range(len(self.history.history[metric])):
                if self.history.history[metric][x] > self.history.history[metric][index]:
                    index = x
                    
        return index

    def make_prediction(self,image,class_dictionary):
        #Rescale image
        img = image/255.

        #Convert to a 4D tensor
        image = np.expand_dims(img, axis=0)
        #print(image.shape)

        # make predictions on the preloaded model
        class_predicted = self.predict(image)
        inID = np.argmax(class_predicted[0])
        label = class_dictionary[inID]
        return label


In [53]:
class PollenTypeCNN(models.Model):
    def __init__(self,**kwargs):
        super(PollenTypeCNN,self).__init__(**kwargs)
        self.X = None #Images
        self.labels = None #Labels
        self.OneHotLabels = None #Integer Mapped Labels
        self.data_train = None
        self.data_test = None
        self.history = None
        self.newMod = None
        
    def buildModel(self, input_shape=(1364, 2048, 3)):
        self.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
        self.add(layers.MaxPooling2D((2, 2)))
        self.add(layers.Conv2D(64, (3, 3), activation='relu'))
        self.add(layers.MaxPooling2D((2, 2)))
        self.add(layers.Conv2D(64, (3, 3), activation='relu'))
        self.add(layers.Flatten())
        self.add(layers.Dense(64, activation='relu'))
        self.add(layers.Dense(2,activation='softmax'))
        self.summary()
        self.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
    
    def buildTransferLearnedModel(self,classes = 42,input_shape=(1364, 2048, 3)):
        model = tf.keras.applications.Xception(weights = "imagenet", include_top=False, input_shape = input_shape)

        for layer in model.layers[:-8]:
            layer.trainable = False
        
        x = model.output
        x = GlobalAveragePooling2D()(x)
        x = Flatten()(x)
        x = Dense(4096, activation="relu")(x)
        x = Dense(256, activation="relu")(x)
        x = Dropout(0.4)(x)
        predictions = Dense(classes, activation="softmax")(x)

        #Functional not Sequential for transfer learned CNN
        self.newMod = Model(model.input,predictions)
        self.newMod.summary()
        self.newMod.compile(loss = "categorical_crossentropy", optimizer = 'adam', metrics=["accuracy"])
        
    def loadData(self, trainDirs=["",""]):
        data = loadDataset2(classLabels=trainDirs[0], imagesDir=trainDirs[1])
        self.X = np.array(data[0])
        self.labels = np.array(data[1])
        
    def preprocess(self, resizeWidth = 341, resizeHeight = 512, testRatio = 0.1):
        #
        self.X = resizeDataset(self.X, width=resizeWidth, height=resizeHeight)
        self.X = np.array(self.X, dtype="float") / 255.0

        le = LabelEncoder()

        self.OneHotLabels = le.fit_transform(self.labels)
        y = to_categorical(self.OneHotLabels)
        #Train test split
        (trainX, testX, trainY, testY) = train_test_split(self.X, y,test_size=testRatio, random_state=42)
        self.data_train = [trainX,trainY]
        self.data_test = [testX,testY]

    def trainModel(self, epochs=10, batch_size=32,augment_batch_size = 8):
        #Data Augmentation
        aug = ImageDataGenerator(
        rotation_range=20,
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        fill_mode="nearest")

        train_generator = aug.flow(self.data_train[0],self.data_train[1],batch_size = augment_batch_size)
        test_generator = aug.flow(self.data_test[0],self.data_test[1],batch_size = augment_batch_size)

        self.history = model.fit(train_generator, epochs=epochs, batch_size=batch_size,
                    validation_data= test_generator)
        
    def kMeansTrainModel(self, classes = 42,epochs=10, batch_size=32,augment_batch_size = 8,kfolds = 10,weightsName = "VGG16"):
        kfold = StratifiedKFold(n_splits=kfolds, shuffle=True, random_state=7)


        #Callbacks
        checkpoint = ModelCheckpoint(weightsName+".h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=True, mode='auto',save_freq='epoch')
        early = EarlyStopping(monitor='val_accuracy', patience=50, verbose=1, mode='auto')

        callbacks = [checkpoint,early]

        for train, test in kfold.split(self.X,self.OneHotLabels):
            #Data Augmentation
            aug = ImageDataGenerator(
            rotation_range=20,
            zoom_range=0.15,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.15,
            horizontal_flip=True,
            fill_mode="nearest")

            #y vec needs to be 1d vector of all labels initally
            #change to categorical values after the split
            #https://stackoverflow.com/questions/48508036/sklearn-stratifiedkfold-valueerror-supported-target-types-are-binary-mul
            cat_trainY = to_categorical(self.OneHotLabels[train], classes)
            cat_testY = to_categorical(self.OneHotLabels[test], classes)

            train_generator = aug.flow(self.X[train],cat_trainY,batch_size = augment_batch_size)
            test_generator = aug.flow(self.X[test],cat_testY,batch_size = 1)

            self.history = self.newMod.fit(train_generator, epochs=epochs, batch_size=batch_size,
                    validation_data= test_generator,callbacks = callbacks)
        
    def graphAccuracy(self):
        history = self.history
        plt.plot(history.history['accuracy'], label='accuracy')
        plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.ylim([0.5, 1])
        plt.legend(loc='lower right')

    def make_prediction(self,image,class_dictionary):
        #Rescale image
        img = image/255.

        #Convert to a 4D tensor
        image = np.expand_dims(img, axis=0)
        #print(image.shape)

        # make predictions on the preloaded model
        class_predicted = self.newMod.predict(image)
        inID = np.argmax(class_predicted)
        label = class_dictionary[inID]
        return label


## Load Model

In [54]:
def getCheckpoint(index):
    return glob.glob(checkpointsDir)[index]

def loadCustomModel(file):
    return load_model(file, custom_objects = {"PollenCNN" : PollenCNN})

## Main

In [55]:
# dirs = [pollenDir, notPollenDir]

# model = PollenCNN()
# model.buildModel(input_shape=(40, 40, 3))
# #model.buildTransferLearnedModel(input_shape=(128, 128, 3))
# model.loadData(trainDirs=dirs)
# model.preprocess(resizeWidth = 40, resizeHeight = 40,testRatio=0.2)
# model.trainModel(epochs=150, batch_size=16,augment_batch_size = 8, early_stopping = False, checkpoints = True)

# model.graphAccuracy()

In [60]:
loaded_model = PollenTypeCNN()

loaded_model.buildTransferLearnedModel(classes = 2,input_shape=(192, 192, 3))

loaded_model.newMod.load_weights(weightsFile)


Model: "functional_13"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            [(None, 192, 192, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 95, 95, 32)   864         input_8[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 95, 95, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 95, 95, 32)   0           block1_conv1_bn[0][0]            
______________________________________________________________________________________

In [61]:
# best_model_file = getCheckpoint(model.getBestEpochByMetric())
# best_model_file

In [62]:
# loaded_model = loadCustomModel(saved_model_file)
# loaded_model = load_model(xception_model_file);

In [63]:
# data = loadDataset(pollenDir, notPollenDir)
# data[0] = resizeDataset(data[0], width=192, height=192)
              
# print(data[0].shape)

# # loaded_model.make_prediction(data[0][0], ["no", "yes"])

(301, 40, 40, 3)


'no'

In [64]:
data = loadDataset(pollenDir, notPollenDir)
data[0] = resizeDataset(data[0], width=192, height=192)

loaded_model.make_prediction(data[0][0], ["no", "yes"])

'no'