# Birds Image Classification

## Section 1: Mount drive, copy images onto virtual machine and download packages

In [None]:
# Mounting Google Drive to access data
from google.colab import drive
drive.mount('/content/drive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/


In [None]:
# Copying zipped data to virtual machine and unzipping, before removing zipped file
zip_path ='/content/drive/My\ Drive/Machine_Learning/Repos/Kaggle_Birds_Classifier/Data/consolidated_zip.zip' 
!cp {zip_path} .
!unzip -q consolidated_zip.zip
!rm consolidated_zip.zip 

In [None]:
# Packages

# Machine Learning Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import Xception
from tensorflow.keras.preprocessing.image import ImageDataGenerator,  DirectoryIterator, array_to_img, img_to_array, load_img
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D,Activation, Dropout, Flatten, Dense
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
import sklearn.metrics


# General 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import os
import seaborn as sns
import time 
import datetime
import io
import itertools
from packaging import version
from six.moves import range
import random


%matplotlib inline

# Setting random Seeds
tf.random.set_seed(42)
np.random.seed(42)

  import pandas.util.testing as tm


## Section 2: Generating Images 

In [None]:
# Path to all images pre-splitting
all_path = '/content/consolidated_for_Zip'

class_list = sorted(os.listdir(all_path))


In [None]:
# Define image data generator
datagen = ImageDataGenerator(
    rescale=1./255,validation_split=0.2,horizontal_flip=True)

val_datagen = ImageDataGenerator(
    rescale=1./255,validation_split=0.2) 

## Section 3: Setting up callbacks 
REMEMBER TO CHANGE FOLDER NAME TO CNN--VERSION

In [None]:
# Creating Model Folder
%cd /content
%rm -rf Models
%mkdir ./Models

/content


In [None]:
# Load previous model log from drive into content folder for comparison in tensorboard
%cp -r /content/drive/My\ Drive/Machine_Learning/Repos/Kaggle_Birds_Classifier/Models/logs ./Models

In [None]:
# Set callbacks 

# Defining directory paths
log_dir = '/content/Models/logs/'
model_name  = 'CNN--009.{}'.format(int(datetime.datetime.now().strftime("%y%m%d%H%M%S")))

# Class names
class_names = class_list

# Defining Callbacks
checkpoint_cb = keras.callbacks.ModelCheckpoint('/content/Models/' + model_name + '.{epoch:02d}--{val_loss:.3f}--{val_categorical_accuracy:.3f}.h5',save_best_only=False)
early_cb = keras.callbacks.EarlyStopping(monitor ='val_categorical_accuracy',patience=5,verbose=1)
tensor_cb = keras.callbacks.TensorBoard(log_dir=log_dir + model_name, profile_batch = 100000000,histogram_freq=1)


In [None]:
# Grouping callbacks
callbacks=[checkpoint_cb,early_cb,tensor_cb]

## Section 4 : Training

In [None]:
%reload_ext tensorboard

In [None]:
# Call Tensorflow
%tensorboard --logdir /content/Models/logs

In [None]:
# Load CNN_005 - Xception Model where only the top layer has been trained. 
%cd /content/drive/My\ Drive/Machine_Learning/Repos/Kaggle_Birds_Classifier/Models
model = keras.models.load_model('CNN--005.200623090057.08--7.169.h5')

model.summary()




/content/drive/My Drive/Machine_Learning/Repos/Kaggle_Birds_Classifier/Models
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 111, 111, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 111, 111, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 111, 111, 32) 0           block1_conv1_bn[0][0]            
________________

In [None]:
# Output layers in the model and their respective indexes
for i,layer in enumerate(model.layers):
  print(i,layer.name)

0 input_1
1 block1_conv1
2 block1_conv1_bn
3 block1_conv1_act
4 block1_conv2
5 block1_conv2_bn
6 block1_conv2_act
7 block2_sepconv1
8 block2_sepconv1_bn
9 block2_sepconv2_act
10 block2_sepconv2
11 block2_sepconv2_bn
12 conv2d
13 block2_pool
14 batch_normalization
15 add
16 block3_sepconv1_act
17 block3_sepconv1
18 block3_sepconv1_bn
19 block3_sepconv2_act
20 block3_sepconv2
21 block3_sepconv2_bn
22 conv2d_1
23 block3_pool
24 batch_normalization_1
25 add_1
26 block4_sepconv1_act
27 block4_sepconv1
28 block4_sepconv1_bn
29 block4_sepconv2_act
30 block4_sepconv2
31 block4_sepconv2_bn
32 conv2d_2
33 block4_pool
34 batch_normalization_2
35 add_2
36 block5_sepconv1_act
37 block5_sepconv1
38 block5_sepconv1_bn
39 block5_sepconv2_act
40 block5_sepconv2
41 block5_sepconv2_bn
42 block5_sepconv3_act
43 block5_sepconv3
44 block5_sepconv3_bn
45 add_3
46 block6_sepconv1_act
47 block6_sepconv1
48 block6_sepconv1_bn
49 block6_sepconv2_act
50 block6_sepconv2
51 block6_sepconv2_bn
52 block6_sepconv3_act

In [None]:
# Assign which layers in model are to be made trainable
for i,layer in enumerate(model.layers):
  if i > 121:
    layer.trainable=True


model.compile(loss='categorical_crossentropy',
                  optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),
                  metrics=['categorical_accuracy'])

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 111, 111, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 111, 111, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 111, 111, 32) 0           block1_conv1_bn[0][0]            
______________________________________________________________________________________________

In [None]:
# Set batch size
batch_size=32  # Reduced when updating Xception model weights due to memory

# --------- Creating data generators --------------------

train_gen = DirectoryIterator(
    all_path,datagen,target_size=(224, 224),batch_size=batch_size,subset='training',classes=class_list,seed=42)

valid_gen = DirectoryIterator(
    all_path,val_datagen,target_size=(224, 224),batch_size=batch_size,subset='validation',classes=class_list,shuffle=False) #Important that shuffle is false

# ------------------ Training --------------------
# Changed to 'fit' rather than 'fit_generator' for running on colab
history = model.fit(
        train_gen,
        steps_per_epoch=23677 // batch_size,
        epochs=50,
        validation_data=valid_gen,
        validation_steps=5826  // batch_size,
        callbacks=callbacks
        )

Found 23677 images belonging to 200 classes.
Found 5826 images belonging to 200 classes.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 00019: early stopping


## Section 5: Check model loads and predicts correctly.

In [None]:
# Load best model 
best_model = keras.models.load_model('/content/Models/CNN--005.200622204852.01--7.321.h5')

OSError: ignored

### Compare top 5 predicted labels with real label for random image

In [None]:
# Calculate predictions and labels 
test_pred_raw = best_model.predict(valid_gen) # One hot encoding predictions
test_pred = np.argmax(test_pred_raw, axis=1) # Ordinal encoded predictions 
test_labels = valid_gen.classes # Ordinal encoded real labels
test_labels_ohc = to_categorical(test_labels) # ohc real labels

In [None]:
rint = random.randint(10,len(valid_gen)*32)
for j in range(rint-10,rint):
    top5_pred = sorted( [(x,i) for (i,x) in enumerate(test_pred_raw[j])], reverse=True)[:5]
    print('Pred_labels:',(top5_pred),'Real_Label:' , test_labels[j], ' Index:',j)

### Plot image with prediction as the title. The title will be red if it is incorrect. 

In [None]:
# Generate first batch of valid_gen
img,label = next(valid_gen)

In [None]:
i = random.randint(0,31) # generate random number in batch
img_array = np.expand_dims(img[i], axis=0) # expand to 4 dimensions so it can be plotted
pred = best_model.predict(img_array) 
image = array_to_img(img[i])
x = plt.imshow(image)
x = plt.axis('off')
title= class_list[pred.argmax()] 
if pred.argmax() == label[i].argmax():
  x = plt.title(title)
else: 
  x = plt.title(title,color='r')

## Section 6: Transfer desired Models and Logs back to Google Drive. 

In [None]:
# Copying logs over to google drive - Turn on if happy with training
% cp -r /content/Models/logs /content/drive/My\ Drive/Machine_Learning/Repos/Kaggle_Birds_Classifier/Models/

In [None]:
# Copying best model over to google drive - Turn on if happy with training
% cd /content/Models
% cp CNN--009.200628204955.14--0.637--0.913.h5 /content/drive/My\ Drive/Machine_Learning/Repos/Kaggle_Birds_Classifier/Models/

/content/Models
