In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import os
from sklearn.model_selection import train_test_split
import tensorflow
import skimage
import skimage.io
import skimage.transform
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPool2D
from keras.layers import Dropout, BatchNormalization,LeakyReLU, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the saved model
from keras.models import model_from_json

In [2]:
np.random.seed(11)
tensorflow.random.set_seed(11)

In [3]:
img_folder='C:\\Users\\satvi\\OneDrive\\Pictures\\Desktop\\Test1\\static1\\images\\'
img_width=128 
#check path 
img_height=128
img_channels=3
bees=pd.read_csv('C:\\Users\\satvi\\OneDrive\\Pictures\\Desktop\\Test1\\bee_data_mod.csv', 
                index_col=False,  
                parse_dates={'datetime':[1,2]},
                dtype={'subspecies':'category', 'health':'category','caste':'category'})
def read_img(file):
    img = skimage.io.imread(img_folder + file)
    img = skimage.transform.resize(img, (img_width, img_height), mode='reflect')
    return img[:,:,:img_channels]

bees.dropna(inplace=True)
img_exists = bees['file'].apply(lambda f: os.path.exists(img_folder + f))
bees = bees[img_exists]

  bees=pd.read_csv('C:\\Users\\satvi\\OneDrive\\Pictures\\Desktop\\Test1\\bee_data_mod.csv',


In [4]:
bees.head()

Unnamed: 0,datetime,file,location,zip code,subspecies,health,pollen_carrying,caste
0,2018-08-25 14:00:00,001_043.png,"Kalakadu, Tamil Nadu",627007,Dammer bee,healthy,False,worker
1,2018-08-25 14:00:00,001_044.png,"Kalakadu, Tamil Nadu",627007,Dammer bee,healthy,False,worker
2,2018-08-25 14:00:00,001_046.png,"Kalakadu, Tamil Nadu",627007,Dammer bee,healthy,False,worker
3,2018-08-25 14:00:00,001_047.png,"Kalakadu, Tamil Nadu",627007,Dammer bee,healthy,False,worker
4,2018-08-25 14:00:00,001_050.png,"Kalakadu, Tamil Nadu",627007,Dammer bee,healthy,False,worker


In [5]:
bees.tail()

Unnamed: 0,datetime,file,location,zip code,subspecies,health,pollen_carrying,caste
1789,2018-08-17 17:50:00,030_800.png,"Alchuna, Uttrakhand",263136,Italian honey bee,unhealthy,False,worker
1790,2018-08-17 17:50:00,030_814.png,"Alchuna, Uttrakhand",263136,Italian honey bee,unhealthy,False,worker
1791,2018-08-17 17:50:00,030_828.png,"Alchuna, Uttrakhand",263136,Italian honey bee,unhealthy,False,worker
1792,2018-08-17 17:50:00,030_829.png,"Alchuna, Uttrakhand",263136,Italian honey bee,unhealthy,False,worker
1793,2018-08-17 17:50:00,030_815.png,"Alchuna, Uttrakhand",263136,Italian honey bee,unhealthy,False,worker


In [6]:
bees.isna()

Unnamed: 0,datetime,file,location,zip code,subspecies,health,pollen_carrying,caste
0,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...
1789,False,False,False,False,False,False,False,False
1790,False,False,False,False,False,False,False,False
1791,False,False,False,False,False,False,False,False
1792,False,False,False,False,False,False,False,False


In [8]:
train_bees, test_bees = train_test_split(bees, random_state=65)
train_bees, val_bees = train_test_split(train_bees, test_size=0.1, random_state = 67)
ncat_bal = int(len(train_bees)/train_bees['subspecies'].cat.categories.size)
train_bees_bal = train_bees.groupby('subspecies', as_index=False).apply(lambda g:  g.sample(ncat_bal, replace=True)).reset_index(drop=True)
train_bees = train_bees_bal

In [9]:
train_X = np.stack(train_bees['file'].apply(read_img))
train_y  = pd.get_dummies(train_bees['subspecies'], drop_first=False)

val_X = np.stack(val_bees['file'].apply(read_img))
val_y = pd.get_dummies(val_bees['subspecies'], drop_first=False)

test_X = np.stack(test_bees['file'].apply(read_img))
test_y = pd.get_dummies(test_bees['subspecies'], drop_first=False)

# Data augmentation - a little bit rotate, zoom and shift input images.
generator = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=180,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True)
generator.fit(train_X)

In [10]:
earlystopper = EarlyStopping(monitor='val_accuracy', patience=25, verbose=1)
checkpointer = ModelCheckpoint('bee_subp.h5'
                                ,monitor='val_accuracy'
                                ,verbose=1
                                ,save_best_only=True
                                ,save_weights_only=True)
model=Sequential()
model.add(Conv2D(32, kernel_size=3, input_shape=(img_width, img_height,3), activation='relu', padding='same'))
model.add(MaxPool2D(2))
model.add(Dropout(0.15))
model.add(Conv2D(32, kernel_size=3, activation='relu', padding='same'))
model.add(Dropout(0.15))
model.add(Flatten())
model.add(Dense(train_y.columns.size, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

steps = np.round(train_X.shape[0] / 256, 0)
training = model.fit_generator(generator.flow(train_X,train_y, batch_size=256)
                        ,epochs=200
                        ,validation_data=(val_X, val_y)
                        ,steps_per_epoch=steps
                        ,callbacks=[earlystopper, checkpointer])

  training = model.fit_generator(generator.flow(train_X,train_y, batch_size=256)


Epoch 1/200
Epoch 1: val_accuracy improved from -inf to 0.66418, saving model to bee_subp.h5
Epoch 2/200
Epoch 2: val_accuracy did not improve from 0.66418
Epoch 3/200
Epoch 3: val_accuracy did not improve from 0.66418
Epoch 4/200
Epoch 4: val_accuracy improved from 0.66418 to 0.69403, saving model to bee_subp.h5
Epoch 5/200
Epoch 5: val_accuracy did not improve from 0.69403
Epoch 6/200
Epoch 6: val_accuracy did not improve from 0.69403
Epoch 7/200
Epoch 7: val_accuracy improved from 0.69403 to 0.71642, saving model to bee_subp.h5
Epoch 8/200
Epoch 8: val_accuracy improved from 0.71642 to 0.81343, saving model to bee_subp.h5
Epoch 9/200
Epoch 9: val_accuracy improved from 0.81343 to 0.82836, saving model to bee_subp.h5
Epoch 10/200
Epoch 10: val_accuracy improved from 0.82836 to 0.89552, saving model to bee_subp.h5
Epoch 11/200
Epoch 11: val_accuracy improved from 0.89552 to 0.93284, saving model to bee_subp.h5
Epoch 12/200
Epoch 12: val_accuracy improved from 0.93284 to 0.97761, savin

In [11]:
model_json = model.to_json()
with open("Subspecies_placeholder.json", "w") as json_file:
    json_file.write(model_json)

# Save the best model weights
model.save_weights("bee_subp.h5")

# Save Subspecies Prediction Placeholder
# with open("subspecies_placeholder.txt", "w") as subspecies_file:
#     subspecies_file.write("Subspecies Prediction Placeholder")
# # Save model weights
# model.save_weights('best.h5')
test_res = model.evaluate(test_X, test_y.values, verbose=0)
print('Loss function: %s, accuracy:' % test_res[0], test_res[1])

Loss function: 0.03911426663398743, accuracy: 0.9932885766029358
