# Bees vs Wasps notebook

In [65]:
!git clone https://github.com/Patricklomp/AI-course-project.git


Cloning into 'AI-course-project'...
remote: Enumerating objects: 22, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 11454 (delta 10), reused 13 (delta 4), pack-reused 11432[K
Receiving objects: 100% (11454/11454), 563.30 MiB | 14.83 MiB/s, done.
Resolving deltas: 100% (12/12), done.
Checking out files: 100% (11427/11427), done.


Andmete URL: https://www.kaggle.com/jerzydziewierz/bee-vs-wasp#

#### Kasutatud materjal
- https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
- https://www.kaggle.com/koshirosato/bee-or-wasp-base-line-using-resnet50/notebook

#### Grupp
- Patrick Lomp
- Artti Raasuke

#### Impordid

In [88]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# additional classic imports
from pathlib import Path
import pandas as pd
import numpy as np
import random
import os
import gc
import cv2
from google.colab.patches import cv2_imshow

from tensorflow.keras.layers import *
from keras.models import Sequential
from keras.optimizers import Adam
from tqdm import tqdm

import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras

In [67]:
gpus = tf.config.experimental.list_physical_devices('GPU') 
for gpu in gpus: 
    tf.config.experimental.set_memory_growth(gpu, True)

In [68]:
# pip3 install --upgrade tensorflow-gpu

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7160550042119458214
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15692777408
locality {
  bus_id: 1
  links {
  }
}
incarnation: 10749052617095718640
physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"
]


#### Parameetrid

In [69]:
bs = 64 # Batch size
resize_size = 96 # for training, resize all the images to a square of this size
training_subsample = 0.1 # for development, use a small fraction of the entire dataset rater than full dataset
ROOT = './AI-course-project/data'
IMG_SIZE = 256
EPOCHS = 50

## Andmestikude loomine




In [70]:
bees_vs_wasps_dataset_path=Path(ROOT) # this is relative to the "example_notebook" folder. Modify this to reflect your setup
df_labels = pd.read_csv(bees_vs_wasps_dataset_path/'labels.csv')
df_labels=df_labels.set_index('id')
# perform dataset subsampling
df_labels = df_labels.sample(frac=training_subsample, axis=0)

In [71]:
for idx in tqdm(df_labels.index):    
    df_labels.loc[idx,'path']=df_labels.loc[idx,'path'].replace('\\', '/') 
    
df_labels.head()

100%|██████████| 1142/1142 [00:00<00:00, 1734.92it/s]


Unnamed: 0_level_0,path,is_bee,is_wasp,is_otherinsect,is_other,photo_quality,is_validation,is_final_validation,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
838,bee1/29666905196_88b341a839_n.jpg,1,0,0,0,1,0,0,bee
4618,wasp1/48656045531_5d84a54b53_n.jpg,0,1,0,0,1,0,0,wasp
4892,wasp1/5960657517_508981ba66_m.jpg,0,1,0,0,1,0,1,wasp
4745,wasp1/505526711_1c5629b267_n.jpg,0,1,0,0,1,0,0,wasp
8277,other_insect/14452397711_4a64d2f05d_n.jpg,0,0,1,0,1,0,0,insect


In [72]:
print("andmestiku suurus enne puhastust:", str(df_labels.size))
df_beesandwasps = df_labels[df_labels['photo_quality'] == 1]
df_labels = df_beesandwasps
print("andmestiku suurus pärast puhastust:", str(df_labels.size))

andmestiku suurus enne puhastust: 10278
andmestiku suurus pärast puhastust: 6570


In [73]:
#Andmete kättesaamine algse csv järgi
train_df = df_labels.query('is_validation == 0 & is_final_validation == 0').reset_index(drop=True)
val_df = df_labels.query('is_validation == 1').reset_index(drop=True)
test_df = df_labels.query('is_final_validation == 1').reset_index(drop=True)

In [74]:
#Meetod admestikude loomiseks pildi failidega
def create_datasets(df, img_size):
    imgs = []
    for path in tqdm(df['path']):
        img = cv2.imread(ROOT+"/"+path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (img_size,img_size))
        imgs.append(img)

    imgs = np.array(imgs, dtype='float32')
    imgs = imgs / 255.0
    df = pd.get_dummies(df['label'])
    return imgs, df


train_imgs, train_df = create_datasets(train_df, IMG_SIZE)
val_imgs, val_df = create_datasets(val_df, IMG_SIZE)
test_imgs, test_df = create_datasets(test_df, IMG_SIZE)

100%|██████████| 509/509 [00:00<00:00, 521.06it/s]
100%|██████████| 120/120 [00:00<00:00, 530.61it/s]
100%|██████████| 101/101 [00:00<00:00, 564.57it/s]


## Andmete puhastamine

## Andmete visualiseerimine

In [75]:
def img_plot(df, label):
    df = df.query('label == @label')
    imgs = []
    for path in df['path'][:9]:
        img = cv2.imread('data\\'+path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        imgs.append(img)
    f, ax = plt.subplots(3, 3, figsize=(15,15))
    for i, img in enumerate(imgs):
        ax[i//3, i%3].imshow(img)
        ax[i//3, i%3].axis('off')
        ax[i//3, i%3].set_title('label: %s' % label)
    plt.show()
    


img_plot(df_clean, label='bee')
img_plot(df_clean, label='wasp')
img_plot(df_clean, label='insect')
img_plot(df_clean, label='other')

error: ignored

## Närvivõrgu loomine ja treenimine

In [167]:
#Närvivõrk
def make_model(img_size, n):
    inp = Input(shape=(img_size,img_size,n))
    x = Conv2D(32, (3,3))(inp)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(64, (3,3))(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = Flatten()(x)
    x = Dense(64)(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(3, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inp, outputs=x) 
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    return model

callback = tf.keras.callbacks.EarlyStopping(patience=10, 
                                               verbose=1, 
                                               restore_best_weights=True)
model = make_model(IMG_SIZE, 3)

In [160]:
model.summary()

Model: "model_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_44 (InputLayer)        [(None, 256, 256, 3)]     0         
_________________________________________________________________
dense_75 (Dense)             (None, 256, 256, 64)      256       
_________________________________________________________________
conv2d_36 (Conv2D)           (None, 254, 254, 32)      18464     
_________________________________________________________________
activation_35 (Activation)   (None, 254, 254, 32)      0         
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 127, 127, 32)      0         
_________________________________________________________________
dense_76 (Dense)             (None, 127, 127, 64)      2112      
_________________________________________________________________
activation_36 (Activation)   (None, 127, 127, 64)      0  

In [135]:
print(train_imgs.shape)
print(train_df.shape)
print(train_df)

(509, 256, 256, 3)
(509, 3)
     bee  insect  wasp
0      1       0     0
1      0       0     1
2      0       0     1
3      0       1     0
4      0       0     1
..   ...     ...   ...
504    1       0     0
505    1       0     0
506    0       1     0
507    1       0     0
508    0       0     1

[509 rows x 3 columns]


In [168]:
#Mudeli treenimine
history = model.fit(train_imgs, 
                    train_df, 
                    batch_size=bs, 
                    epochs=EPOCHS,
                    callbacks=[callback],
                    validation_data=(val_imgs, val_df)
                   )

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Restoring model weights from the end of the best epoch.
Epoch 00015: early stopping


## Testimine

In [169]:
#Test hulgal täpsu
model.evaluate(test_imgs, test_df) 



[0.9695920348167419, 0.603960394859314]