In [26]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras import layers
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation,GlobalMaxPooling2D
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.applications import VGG16
from keras.models import Model

In [33]:
from pathlib import Path


train_dir = "kaggle_bee_vs_wasp/"
bs = 32 # Batch size
resize_size = 128 # for training, resize all the images to a square of this size
training_subsample = 0.1 # for development, use a small fraction of the entire dataset rater than full dataset
bees_vs_wasps_dataset_path=Path(train_dir) # this is relative to the "example_notebook" folder. Modify this to reflect your setup
df_labels = pd.read_csv(bees_vs_wasps_dataset_path/'labels.csv')
df_labels=df_labels.set_index('id')
# perform dataset subsampling
df_labels = df_labels.sample(frac=training_subsample, axis=0)
insect_class = {'bee': 0,'wasp': 1, 'insect': 2,'other': 3} 
#df_labels.label = [insect_class[item] for item in df_labels.label] 
df_labels = df_labels[['path','label']]
train_df, validation_df = train_test_split(df_labels, test_size=0.1)
train_df = train_df.reset_index(drop=True)
validation_df = validation_df.reset_index(drop=True)

In [34]:
pretrained_model = VGG16(include_top=False, weights= "imagenet",input_shape=(resize_size, resize_size, 3))
print(pretrained_model.summary())
for layer in pretrained_model.layers:
    layer.trainable = False

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0     

In [35]:
last_layer = pretrained_model.get_layer('block5_pool')
last_output = last_layer.output
     
x = Flatten()(last_output)
model = Model(pretrained_model.input, x)
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])
model.summary()

Model: "functional_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)      

In [36]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    train_dir,  
    x_col='path',
    y_col='label',
    class_mode='sparse',
    target_size=(resize_size, resize_size),
    batch_size=bs
)
features = model.predict(train_generator)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validation_df, 
    train_dir, 
    x_col='path',
    y_col='label',
    class_mode='sparse',
    target_size=(resize_size, resize_size),
    batch_size=bs
)
valid_model_features = model.predict(validation_generator)

Found 1027 validated image filenames belonging to 4 classes.
Found 115 validated image filenames belonging to 4 classes.


In [37]:
X = features
y = np.array(train_generator.labels)

In [38]:
features.shape

(1027, 8192)

In [39]:
from sklearn import svm
lin_svm = svm.SVC(kernel='linear')
quadratic_svm = svm.SVC(kernel='poly', degree=2)
exp_svm = svm.SVC(kernel='rbf')
lin_svm.fit(X,y)
quadratic_svm.fit(X,y)
exp_svm.fit(X,y)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [40]:
y_pred = lin_svm.predict(valid_model_features)

In [41]:
from sklearn.metrics import accuracy_score
accuracy_score(validation_generator.labels, y_pred)
lin_score = lin_svm.score(valid_model_features, validation_generator.labels)
quadratic_score = quadratic_svm.score(valid_model_features, validation_generator.labels)
exp_score = exp_svm.score(valid_model_features, validation_generator.labels)

In [42]:
print("Lin score " + str(lin_score))
print("Quadratic score " + str(quadratic_score))
print("Exp score " + str(exp_score))

Lin score 0.2608695652173913
Quadratic score 0.45217391304347826
Exp score 0.45217391304347826
