In [None]:
import os, os.path
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras import layers
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation,GlobalMaxPooling2D
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.applications import VGG16
from keras.models import Model
from keras.layers import merge, Input
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
from pathlib import Path
train_dir = "kaggle_bee_vs_wasp/"
bs = 32 # Batch size
resize_size = 224 # for training, resize all the images to a square of this size
training_subsample = 1 # for development, use a small fraction of the entire dataset rater than full dataset

bees_vs_wasps_dataset_path=Path(train_dir) 
df_labels = pd.read_csv(bees_vs_wasps_dataset_path/'labels.csv')
df_labels=df_labels.set_index('id')
df_labels = df_labels.sample(frac=training_subsample, axis=0)
insect_class = {'bee': 0,'wasp': 1, 'insect': 2,'other': 3} 
df_labels = df_labels[['path','label']]

In [None]:
#Uncomment to read data from csv
#df_labels = pd.read_csv("Dataset_full.csv") #full dataset
#df_labels = pd.read_csv("Dataset_10precent.csv") #0.1 fraction for development

In [None]:
train_df, test_val_df = train_test_split(df_labels, test_size=0.2)
test_df, validation_df = train_test_split(test_val_df, test_size=0.5)
test_df = test_df.reset_index(drop=True)
train_df = train_df.reset_index(drop=True)
validation_df = validation_df.reset_index(drop=True)

In [None]:
#Create generators from dataframes
Labels = ["bee1","bee2","wasp1","wasp2","other_insect","other_noinsect"]
for label in Labels:
    train_datagen_aug = ImageDataGenerator(
        rotation_range=40,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        width_shift_range=0.15,
        height_shift_range=0.15
    )
    augumented_generator = train_datagen_aug.flow_from_directory(
        directory = "kaggle_bee_vs_wasp2/"+label,  
        target_size=(resize_size, resize_size),
        batch_size=32,
        save_to_dir="kaggle_bee_vs_wasp/"+label,
        save_prefix=label,
        save_format="png"
    )
    for batch in augumented_generator:
        pass


In [None]:
#Create custom vgg models

image_input = Input(shape=(224, 224, 3))

# Model with last convolutional layer to train

model1 = VGG16(input_tensor=image_input, include_top=True,weights='imagenet')
last_layer = model1.get_layer('block5_pool').output
x= Flatten(name='flatten')(last_layer)
x = Dense(1024, activation='relu', name='fc1')(x)
x = Dense(1024, activation='relu', name='fc2')(x)
out = Dense(4, activation='softmax', name='output')(x)

model_last_layer = Model(image_input, out, name="model_last_layer")

for layer in model_last_layer.layers[:-6]:
	layer.trainable = False

# Model with 2 last convolutional layers to train

model2 = VGG16(input_tensor=image_input, include_top=True,weights='imagenet')
last_layer = model2.get_layer('block5_pool').output
x= Flatten(name='flatten')(last_layer)
x = Dense(1024, activation='relu', name='fc1')(x)
x = Dense(1024, activation='relu', name='fc2')(x)
out = Dense(4, activation='softmax', name='output')(x)
model_last_2_layers = Model(image_input, out, name="model_last_2_layers")

for layer in model_last_2_layers.layers[:-7]:
	layer.trainable = False

# Full train model

model3 = VGG16(input_tensor=image_input, include_top=True,weights='imagenet')
last_layer = model3.get_layer('block5_pool').output
x= Flatten(name='flatten')(last_layer)
x = Dense(1024, activation='relu', name='fc1')(x)
x = Dense(1024, activation='relu', name='fc2')(x)
out = Dense(4, activation='softmax', name='output')(x)

model_full = Model(image_input, out, name="model_full")

# Full train model - model simplified

model4 = VGG16(input_tensor=image_input, include_top=True,weights='imagenet')
last_layer = model4.get_layer('block4_pool').output
x= Flatten(name='flatten')(last_layer)

x = Dense(1024, activation='relu', name='fc1')(x)
x = Dense(1024, activation='relu', name='fc2')(x)
out = Dense(4, activation='softmax', name='output')(x)

model_simplified = Model(image_input, out, name="model_simplified")



model_last_layer.compile(loss='sparse_categorical_crossentropy',optimizer='adadelta',metrics=['accuracy'])
model_last_2_layers.compile(loss='sparse_categorical_crossentropy',optimizer='adadelta',metrics=['accuracy'])
model_full.compile(loss='sparse_categorical_crossentropy',optimizer='adadelta',metrics=['accuracy'])
model_simplified.compile(loss='sparse_categorical_crossentropy',optimizer='adadelta',metrics=['accuracy'])

model_last_layer.summary()
model_last_2_layers.summary()
model_full.summary()
model_simplified.summary()




In [None]:
#Create generators from dataframes
train_datagen = ImageDataGenerator(
    rescale=1./255
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    train_dir,  
    x_col='path',
    y_col='label',
    class_mode='sparse',
    target_size=(resize_size, resize_size),
    batch_size=bs
)
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validation_df, 
    train_dir, 
    x_col='path',
    y_col='label',
    class_mode='sparse',
    target_size=(resize_size, resize_size),
    batch_size=bs
)
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    test_df, 
    train_dir, 
    x_col='path',
    y_col='label',
    class_mode='sparse',
    target_size=(resize_size, resize_size),
    batch_size=1
)

In [None]:
#Training models
n_training_samples = len(train_df)
n_validation_samples = len(validation_df)

history1 = model_last_layer.fit(
    train_generator,
    epochs=25,
    validation_data=validation_generator,
    validation_steps=n_validation_samples//bs,
    steps_per_epoch=n_training_samples//bs)

model_last_layer.save('last_layer_trained_model.h5')

history2 = model_last_2_layers.fit(
    train_generator,
    epochs=25,
    validation_data=validation_generator,
    validation_steps=n_validation_samples//bs,
    steps_per_epoch=n_training_samples//bs)

model_last_2_layers.save('last_2_layers_trained_model.h5')

history3 = model_full.fit(
    train_generator,
    epochs=25,
    validation_data=validation_generator,
    validation_steps=n_validation_samples//bs,
    steps_per_epoch=n_training_samples//bs)
    
model_full.save('full_trained_model.h5')


history4 = model_simplified.fit(
    train_generator,
    epochs=25,
    validation_data=validation_generator,
    validation_steps=n_validation_samples//bs,
    steps_per_epoch=n_training_samples//bs)

model_full.save('model_simplified.h5')


In [None]:
filenames = test_generator.filenames
n_test_samples = len(filenames)

#evaluate the models
scores1 = model_last_layer.evaluate(test_generator,steps=n_test_samples, verbose=1)
scores2 = model_last_2_layers.evaluate(test_generator,steps=n_test_samples, verbose=1)
scores3 = model_full.evaluate(test_generator,steps=n_test_samples, verbose=1)
scores4 = model_simplified.evaluate(test_generator,steps=n_test_samples, verbose=1)

In [None]:
filenames = test_generator.filenames
n_test_samples = len(filenames)


In [None]:
print("123")

In [None]:
#save training info for further evaluation
np.savetxt("model3_last_layer/val_loss.csv",  
           history1.history["val_loss"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model3_last_layer/val_accuracy.csv",  
           history1.history["val_accuracy"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model3_last_layer/loss.csv",  
           history1.history["loss"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model3_last_layer/accuracy.csv",  
           history1.history["accuracy"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model3_last_layer/accuracy_on_test.csv",  
           scores1, 
           delimiter =", ",  
           fmt ='% s') 


np.savetxt("model4_last_2layers/val_loss.csv",  
           history2.history["val_loss"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model4_last_2layers/val_accuracy.csv",  
           history2.history["val_accuracy"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model4_last_2layers/loss.csv",  
           history2.history["loss"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model4_last_2layers/accuracy.csv",  
           history2.history["accuracy"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model4_last_2layers/accuracy_on_test.csv",  
           scores2, 
           delimiter =", ",  
           fmt ='% s') 

np.savetxt("model5_full/val_loss.csv",  
           history3.history["val_loss"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model5_full/val_accuracy.csv",  
           history3.history["val_accuracy"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model5_full/loss.csv",  
           history3.history["loss"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model5_full/accuracy.csv",  
           history3.history["accuracy"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model5_full/accuracy_on_test.csv",  
           scores3, 
           delimiter =", ",  
           fmt ='% s') 

np.savetxt("model6_full/val_loss.csv",  
           history4.history["val_loss"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model6_full/val_accuracy.csv",  
           history4.history["val_accuracy"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model6_full/loss.csv",  
           history4.history["loss"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model6_full/accuracy.csv",  
           history4.history["accuracy"], 
           delimiter =", ",  
           fmt ='% s') 
np.savetxt("model6_full/accuracy_on_test.csv",  
           scores4, 
           delimiter =", ",  
           fmt ='% s') 

In [None]:
print ("Model 1 accuracy on test set:" + str(scores1[1]) )
print ("Model 2 accuracy on test set:" + str(scores2[1]) )
print ("Model 3 accuracy on test set:" + str(scores3[1]) )
print ("Model 4 accuracy on test set:" + str(scores4[1]) )