# Bees vs Wasps notebook

#### Imports

In [38]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# additional classic imports
from pathlib import Path
import pandas as pd
import numpy as np
import random
import os
import gc
import cv2

from keras.models import Model
from keras.layers import Input, Conv2D, Activation, Flatten, Dense
from keras.optimizers import Adam
from keras.layers import MaxPooling2D, BatchNormalization, Dropout
from keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
import keras
import matplotlib.pyplot as plt

##Selle järgi võiks teha https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
##https://www.kaggle.com/koshirosato/bee-or-wasp-base-line-using-resnet50/notebook

#### Run parameters

In [35]:
bs = 64 # Batch size
resize_size = 96 # for training, resize all the images to a square of this size
training_subsample = 0.1 # for development, use a small fraction of the entire dataset rater than full dataset
ROOT = './data'
IMG_SIZE = 256

In [28]:
bees_vs_wasps_dataset_path=Path(ROOT) # this is relative to the "example_notebook" folder. Modify this to reflect your setup
df_labels = pd.read_csv(bees_vs_wasps_dataset_path/'labels.csv')
df_labels=df_labels.set_index('id')
# perform dataset subsampling
df_labels = df_labels.sample(frac=training_subsample, axis=0)

In [30]:
for idx in tqdm(df_labels.index):    
    df_labels.loc[idx,'path']=df_labels.loc[idx,'path'].replace('\\', '/') 
    
df_labels.head()

100%|████████████████████████████████████████████████████████████████████████████| 1142/1142 [00:00<00:00, 2649.71it/s]


Unnamed: 0_level_0,path,is_bee,is_wasp,is_otherinsect,is_other,photo_quality,is_validation,is_final_validation,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
6407,wasp2/H00060.jpg,0,1,0,0,0,0,1,wasp
9356,other_insect/39302773652_e4d37fccbe_m.jpg,0,0,1,0,1,0,0,insect
3340,wasp1/14506580520_de3344bfe7_n.jpg,0,1,0,0,1,0,0,wasp
754,bee1/28498682167_5746a935d3_m.jpg,1,0,0,0,1,0,0,bee
9302,other_insect/38110134886_62acfbb6e7_n.jpg,0,0,1,0,1,0,0,insect


## Andmestikude loomine




In [17]:
train_df = df_labels.query('is_validation == 0 & is_final_validation == 0').reset_index(drop=True)
val_df = df_labels.query('is_validation == 1').reset_index(drop=True)
test_df = df_labels.query('is_final_validation == 1').reset_index(drop=True)

In [37]:
def create_datasets(df, img_size):
    imgs = []
    for path in tqdm(df_labels['path']):
        img = cv2.imread(ROOT+"/"+path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (img_size,img_size))
        imgs.append(img)

    imgs = np.array(imgs, dtype='float32')
    imgs = imgs / 255.0
    df = pd.get_dummies(df['label'])
    return imgs, df


train_imgs, train_df = create_datasets(train_df, IMG_SIZE)
val_imgs, val_df = create_datasets(val_df, IMG_SIZE)
test_imgs, test_df = create_datasets(test_df, IMG_SIZE)

100%|██████████████████████████████████████████████████████████████████████████████| 1142/1142 [00:12<00:00, 90.33it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1142/1142 [00:05<00:00, 220.80it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1142/1142 [00:05<00:00, 222.26it/s]


In [16]:
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

In [12]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        'data/train',  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        'data/validation',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')

In [6]:
#Närvivõrk
x = Input(shape=(32, 32, 3))
c1 = Conv2D(32, (3, 3), strides=1, padding='same')(x)
b1 = BatchNormalization()(c1)
a1 = Activation('relu')(b1)
c2 = Conv2D(32, (3, 3), strides=1)(a1)
b2 = BatchNormalization()(c2)
a2 = Activation('relu')(b2)
p2 = MaxPooling2D( pool_size=(2, 2) )(a2)
d2 = Dropout(.25)(p2)
f2 = Flatten()(d2)
h3 = Dense(100)(f2)
b3 = BatchNormalization()(h3)
a3 = Activation('relu')(b3)
d3 = Dropout(.5)(a3)
z = Dense(10)(d3)
p = Activation('softmax')(z)

model = Model(inputs=x, outputs=p)
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])
model.summary()


Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 30, 32)        9248      
_________________________________________________________________
batch_normalization_2 (Batch (None, 30, 30, 32)        128       
_________________________________________________________________
activation_2 (Activation)    (None, 30, 30, 32)        0   

NameError: name 'X_train' is not defined

In [None]:
model.fit_generator(
        train_generator,
        steps_per_epoch=2000 // batch_size,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=800 // batch_size)
model.save_weights('first_try.h5')  # always save your weights after training or during training