In [1]:
import numpy as np
#import pandas as pd
#import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import glob
import cv2

import os

#TRAINGING SET:

In [3]:
def getImagesfromXie(no_of_classes):
    insect_images = []
    labels = []
    i = 0
    for insect_dir_path in glob.glob("/content/sample_data/xie/*"):
        insect_label = insect_dir_path.split("/")[-1]
        if no_of_classes == i:
            break
        for image_path in glob.glob(os.path.join(insect_dir_path, "*.jpg")):
            image = cv2.imread(image_path, cv2.IMREAD_COLOR)

            image = cv2.resize(image, (64, 64)) 
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            insect_images.append(image)
            labels.append(insect_label)
        i = i + 1

    insect_images = np.array(insect_images)
    labels = np.array(labels)
    label_to_id_dict = {v: i for i, v in enumerate(np.unique(labels))}
    id_to_label_dict = {v: k for k, v in label_to_id_dict.items()}
    id_to_label_dict


    label_ids = np.array([label_to_id_dict[x] for x in labels])
    insect_images.shape, label_ids.shape, labels.shape
    
    print ("Total Images : " +  str(insect_images.shape[0]))
    return insect_images, label_ids

In [4]:
insects_5_classes, labels_5_classes = getImagesfromXie(no_of_classes=5)
insects_10_classes, labels_10_classes = getImagesfromXie(no_of_classes=10)
insects_16_classes, labels_16_classes = getImagesfromXie(no_of_classes=16)
insects_24_classes, labels_24_classes = getImagesfromXie(no_of_classes=24)

Total Images : 1403
Total Images : 2809
Total Images : 4504
Total Images : 6892


In [5]:
#SETTING UP THE NEURAL NETWORK
def modelCNN(X_train, X_test, Y_train, Y_test, Y, numClasses):
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
    from sklearn.metrics import confusion_matrix
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import classification_report
    
    model_cnn = Sequential()
    # First convolutional layer, note the specification of shape
    model_cnn.add(Conv2D(32, kernel_size=(3,3),
                     activation='relu',
                     input_shape=(64, 64, 3)))

    #Second layer
    model_cnn.add(Conv2D(64, (3, 3), activation='relu'))
    #model_cnn.add(Conv2D(64, (3, 3), activation='relu'))
    model_cnn.add(MaxPooling2D(pool_size=(2, 2)))

    #Third Layer
    model_cnn.add(Conv2D(64, (3, 3), activation='relu'))
    #model_cnn.add(Conv2D(64, (3, 3), activation='relu'))
    model_cnn.add(MaxPooling2D(pool_size=(2, 2)))

    #Fourth layer
    model_cnn.add(Conv2D(128, (3, 3), activation='relu'))

    model_cnn.add(Conv2D(128, (3, 3), activation='relu'))
    model_cnn.add(MaxPooling2D(pool_size=(2, 2)))


    model_cnn.add(Dropout(0.25))
    model_cnn.add(Flatten())
    model_cnn.add(Dense(128, activation='relu'))
    model_cnn.add(Dropout(0.5))
    model_cnn.add(Dense(numClasses, activation='softmax'))
    
    opt = keras.optimizers.Adam(lr=0.001)
    model_cnn.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=['accuracy'])


    #model_cnn.summary()

    # Compile the model to put it all together.
    '''model_cnn.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])'''


    model_cnn.fit(X_train, Y_train,
              batch_size=64,
              epochs=50,
              verbose=1,
              validation_data=(X_test, Y_test))
    #score = model_cnn.evaluate(X_test, Y_test, verbose=0)

    predict = model_cnn.predict(X_test, batch_size=1)
    y = np.argmax(predict, axis=1)

    #results = confusion_matrix(Y,y)
    #print('Confusion Matrix :')
    #print(results)
    print('Accuracy Score :',accuracy_score(Y, y))
    #print('Report : ')
    #print(classification_report(Y, y))
    return accuracy_score(Y, y)    

In [6]:
from sklearn.model_selection import KFold

def getKFoldCV(Images, Labels, numClasses):
    # KFold Cross Validation approach
    kf = KFold(n_splits=10,shuffle=True,random_state=1245)
    kf.split(Images)

    # Initialize the accuracy of the models to blank list. The accuracy of each model will be appended to this list
    accuracy_model = []

    # Iterate over each train-test split
    for train_index, test_index in kf.split(Images):
        # Split train-test
        X_train, X_test = Images[train_index], Images[test_index]
        Y_train, Y_test = Labels[train_index], Labels[test_index]
        Y=Y_test

        #Normalize color values to between 0 and 1
        X_train = X_train/255
        X_test = X_test/255

        #Make a flattened version for some of our models
        X_flat_train = X_train.reshape(X_train.shape[0], 64*64*3)
        X_flat_test = X_test.reshape(X_test.shape[0], 64*64*3)

        #One Hot Encode the Output
        Y_train = keras.utils.to_categorical(Y_train,numClasses)
        Y_test = keras.utils.to_categorical(Y_test,numClasses)

        # Train the model
        print('Original Sizes:', X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
        accuracy_model.append(modelCNN(X_train, X_test, Y_train, Y_test, Y, numClasses))
    
    return (accuracy_model)    

# WANG 10-Fold 5 Classes 

In [7]:
import time
start_time= time.time()
accurary_5Classes = getKFoldCV(insects_5_classes, labels_5_classes, 5)
end_time=time.time()
print(accurary_5Classes)
print("Total time taken {}".format(end_time-start_time)) 

Original Sizes: (1262, 64, 64, 3) (141, 64, 64, 3) (1262, 5) (141, 5)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy Score : 0.9432624113475178
Original Sizes: (1262, 64, 64, 3) (141, 64, 64, 3) (1262, 5) (141, 5)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Ep

# WANG 10-Fold 10 Classes 

In [8]:
import time
start_time= time.time()
accurary_10Classes = getKFoldCV(insects_10_classes, labels_10_classes, 10)
end_time=time.time()
print(accurary_10Classes)
print("Total time taken {}".format(end_time-start_time)) 

Original Sizes: (2528, 64, 64, 3) (281, 64, 64, 3) (2528, 10) (281, 10)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy Score : 0.9252669039145908
Original Sizes: (2528, 64, 64, 3) (281, 64, 64, 3) (2528, 10) (281, 10)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/5

# WANG 10-Fold 16 Classes 

In [None]:
import time
start_time= time.time()
accurary_16Classes = getKFoldCV(insects_16_classes, labels_16_classes, 16)
end_time=time.time()
print(accurary_16Classes)
print("Total time taken {}".format(end_time-start_time)) 

Original Sizes: (4053, 64, 64, 3) (451, 64, 64, 3) (4053, 16) (451, 16)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50

# WANG 10-Fold 24 Classes 

In [None]:
import time
start_time= time.time()
accurary_24Classes = getKFoldCV(insects_24_classes, labels_24_classes, 24)
end_time=time.time()
print(accurary_24Classes)
print("Total time taken {}".format(end_time-start_time)) 