# Convolutional neural network for bumble bee species ID from images

### Load dependencies

In [None]:
import tensorflow as tf # I'm using tf v1.13.1
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import keras
import random

from sklearn import svm, datasets
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization, ZeroPadding2D, AveragePooling2D
from keras import regularizers
from keras import backend as K
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import time

### Set up data directory and names of species included in the classification model


In [None]:
DATADIR = "C:/Bombus_images_cropped"
CATEGORIES = ["Bombus_affinis", "Bombus_appositus", "Bombus_auricomus", "Bombus_bifarius", "Bombus_bimaculatus", "Bombus_borealis", 
             "Bombus_caliginosus", "Bombus_centralis", "Bombus_citrinus", "Bombus_crotchii", "Bombus_cryptarum", "Bombus_fernaldae_flavidus", 
             "Bombus_fervidus", "Bombus_flavifrons", "Bombus_fraternus", "Bombus_frigidus", "Bombus_griseocollis", "Bombus_huntii", 
             "Bombus_impatiens", "Bombus_insularis", "Bombus_melanopygus", "Bombus_mixtus", "Bombus_morrisoni", "Bombus_nevadensis", 
             "Bombus_occidentalis", "Bombus_pensylvanicus_sonorus", "Bombus_perplexus","Bombus_rufocinctus", "Bombus_sandersoni",
             "Bombus_sitkensis", "Bombus_sylvicola", "Bombus_ternarius", "Bombus_terricola", "Bombus_vagans", "Bombus_vandykei", 
             "Bombus_vosnesenskii"]

### Set number of classes (species) and image size (number of pixels)

In [None]:
num_classes = len(CATEGORIES)
IMG_SIZE = 299 #length and width of input images

### Import the data

In [None]:
training_data = []
img_filenames = []
def create_training_data():
    for category in CATEGORIES:
      path=os.path.join(DATADIR,category)
      class_num = CATEGORIES.index(category)
      for img in os.listdir(path):
        try:
            img_array = cv2.imread(os.path.join(path,img))
            new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
            new_array = cv2.cvtColor(new_array, cv2.COLOR_BGR2RGB)
            training_data.append([new_array, class_num])
            img_filenames.append(os.path.join(path,img))
        except Exception as e:
            pass

create_training_data()

### Set up and resize the image arrays

In [None]:
X = []
y = []

for features, label in training_data:
    X.append(features)
    y.append(label)

X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE,3)

### Split data into training and testing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42) #split into training and test data
img_file_train, img_file_test = train_test_split(img_filenames, test_size=0.2, random_state=42) #split into training and test data

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

### Data are imbalanaced. Set up class weights to help account for this

In [None]:
weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
weights_dict = dict(enumerate(weights))

### Set batch size and data augmentation parameters

In [None]:
batch_size = 16

train_datagen = ImageDataGenerator(
        rotation_range=100,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.3,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest')

### Set up the model

In [None]:
import keras_applications
from keras.applications.vgg16 import VGG16
from keras.applications import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications import InceptionResNetV2
from keras.applications import Xception
from keras.layers import Dense, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.applications import MobileNetV2

# InceptionV3 performed best of the models above 
base_model = InceptionV3(include_top=False, pooling ='avg', input_shape=(IMG_SIZE, IMG_SIZE, 3))

x = base_model.output
x = Dense(1000, activation='relu')(x)
x = Dropout(0.06)(x)
x = Dense(1000, activation='relu')(x)
x = Dropout(0.06)(x)
predictions = Dense(num_classes, activation='softmax')(x)

model = Model(inputs = base_model.input, outputs = predictions)

# Summarize the model
model.summary()

### Set up early stopping, save best model, adjust learning rate

In [None]:
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_delta=1e-4, mode='min')

### Compile model

In [None]:
#Top-3 accuracy can be included in metrics
top3_acc = functools.partial(keras.metrics.sparse_top_k_categorical_accuracy, k=3)
top3_acc.__name__ = 'top3_acc'

model.compile(loss="sparse_categorical_crossentropy", 
              optimizer="sgd", 
              metrics=['accuracy'])

### Run the model

In [None]:
num_epocs = 50
history = model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=batch_size),
                              steps_per_epoch = X_train.shape[0] // batch_size,
                              epochs=num_epocs,
                              validation_data=(X_test, y_test),  
                              verbose=1,
                              class_weight=weights_dict,
                              callbacks=[mcp_save, reduce_lr_loss],
                              use_multiprocessing=False,
                              workers=4)

### Save the model

In [None]:
model.save('model_x_date.h5')

### Plot loss and accuracy

In [None]:
#Plot loss and accuracy
# Plot training & validation accuracy values
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='lower right')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

### Confusion matrix

In [None]:
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)

cm = confusion_matrix(y_test, y_pred_labels)
print(cm)

 ### Classification report

In [None]:
from sklearn.metrics import classification_report, accuracy_score
print ('Accuracy Score:',accuracy_score(y_test, y_pred_labels))
print(classification_report(y_test, y_pred_labels))

### Look at a random test image and see prediction results

In [None]:
import random

num_images = 1
rand_images = []
for i in range(0,num_images):
    n = random.randint(1,15431)
    rand_images.append(n)

for x in rand_images:
    y_pred = model.predict(X_test[[x]])
    y_pred_labels = np.argmax(y_pred)
    plt.imshow(X_test[x])
    print('Test image:',x)
    print('True species:',CATEGORIES[y_test[x]])
    print('Predicted species:',CATEGORIES[y_pred_labels])

### Make table of misclassified images

In [None]:
test_image = ['Test image']
true_species = ['True species']
pred_species = ['Predicted species']
probability = ['Probability']
img_file_name = ['File name']

test_images = list(range(len(X_test)))
for x in test_images:
    y_pred = model.predict(X_test[[x]])
    y_pred_labels = np.argmax(y_pred)
    if y_test[x] != y_pred_labels:
        test_image.append(x)
        true_species.append(CATEGORIES[y_test[x]])
        pred_species.append(CATEGORIES[y_pred_labels])
        probability.append(max(max(model.predict(X_test[[x]]))))
        img_file_name.append(img_file_test[x])

true_pred_species = list(zip(test_image, true_species, pred_species, probability, img_file_name))

### Save misclassification table

In [None]:
np.savetxt('true_pred_species', true_pred_species, fmt='%s', delimiter=',')

### Display image and results from a particular test image

In [None]:
image_num = 221
y_pred = model.predict(X_test[[image_num]])
y_pred_labels = np.argmax(y_pred)
plt.imshow(X_test[image_num])
print('Test image:',image_num)
print('True species:',CATEGORIES[y_test[image_num]])
print('Predicted species:',CATEGORIES[y_pred_labels])