# Birds Classifications

## Definition of Transfer Learning

Transfer learning (TL) is a research problem in machine learning (ML) that focuses on storing knowledge gained while solving one problem and applying it to a different but related problem. For example, knowledge gained while learning to recognize cars could apply when trying to recognize trucks. This area of research bears some relation to the long history of psychological literature on transfer of learning, although formal ties between the two fields are limited. From the practical standpoint, reusing or transferring information from previously learned tasks for the learning of new tasks has the potential to significantly improve the sample efficiency of a reinforcement learning agent.
<br><br>
Ressource from : [Wikipedia](https://en.wikipedia.org/wiki/Transfer_learning)

In [1]:
import pandas as pd
import numpy as np
import os
import cv2
import glob
import matplotlib.pyplot as plt
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Flatten, Activation
from tensorflow.keras.models import Model, Sequential

# Split Data

In [2]:
# Splitting the data starts here, we store the path to the training set into a variable for further referencing.

train_path = './dataset/train'
test_path = './dataset/test'
valid_path = './dataset/valid'

birds = np.array(list(os.listdir(train_path)))
print(birds)

['CREAM COLORED WOODPECKER' 'RUFUOS MOTMOT' 'WHITE TAILED TROPIC'
 'KOOKABURRA' 'TREE SWALLOW' 'AMERICAN REDSTART' 'EASTERN TOWEE'
 'IBERIAN MAGPIE' 'HOOPOES' 'BELTED KINGFISHER' 'CRESTED CARACARA'
 'IMPERIAL SHAQ' 'VIOLET GREEN SWALLOW' 'CAPE MAY WARBLER'
 'EASTERN MEADOWLARK' 'EUROPEAN GOLDFINCH' 'HOATZIN' 'TAIWAN MAGPIE'
 'RED BELLIED PITTA' 'BARRED PUFFBIRD' 'SNOWY OWL' 'HAMMERKOP'
 'NORTHERN GOSHAWK' 'AFRICAN EMERALD CUCKOO' 'ASHY THRUSHBIRD'
 'BANDED STILT' 'COPPERY TAILED COUCAL' 'NORTHERN RED BISHOP'
 'PURPLE FINCH' 'TAKAHE' 'SANDHILL CRANE' 'KING VULTURE' 'SHOEBILL'
 'TRUMPTER SWAN' 'BLACK-NECKED GREBE' 'FIORDLAND PENGUIN' 'AMERICAN COOT'
 'HEPATIC TANAGER' 'PUFFIN' 'BLACKBURNIAM WARBLER' 'NORTHERN FULMAR'
 'BROWN THRASHER' 'PALILA' 'JABIRU' 'ANNAS HUMMINGBIRD' 'HIMALAYAN MONAL'
 'CRESTED COUA' 'NOISY FRIARBIRD' 'CALIFORNIA GULL' 'PARUS MAJOR'
 'VENEZUELIAN TROUPIAL' 'BIRD OF PARADISE' 'STRAWBERRY FINCH'
 'RED HONEY CREEPER' 'RED FACED WARBLER' 'EMPEROR PENGUIN'
 'CHINESE BAMB

In [3]:
# pick only 20 type of birds to train on
nr_birds = 30

np.random.shuffle(birds)
# slicing the data
birds = birds[:nr_birds]

In [4]:
idx_to_name = {i:x for (i,x) in enumerate(birds)}
name_to_idx = {x:i for (i,x) in enumerate(birds)}
print(idx_to_name)

print(idx_to_name.values())

{0: 'GREAT GRAY OWL', 1: 'CRESTED KINGFISHER', 2: 'KOOKABURRA', 3: 'VIOLET GREEN SWALLOW', 4: 'STORK BILLED KINGFISHER', 5: 'AFRICAN CROWNED CRANE', 6: 'ROCK DOVE', 7: 'NORTHERN GOSHAWK', 8: 'HIMALAYAN BLUETAIL', 9: 'VIOLET TURACO', 10: 'CROW', 11: 'BOBOLINK', 12: 'GANG GANG COCKATOO', 13: 'BLACK & YELLOW  BROADBILL', 14: 'BARN OWL', 15: 'RED HEADED WOODPECKER', 16: 'RAZORBILL', 17: 'BLACK COCKATO', 18: 'HARLEQUIN QUAIL', 19: 'OKINAWA RAIL', 20: 'HAWFINCH', 21: 'CINNAMON FLYCATCHER', 22: 'ORANGE BRESTED BUNTING', 23: 'HOOPOES', 24: 'TOUCHAN', 25: 'GUINEAFOWL', 26: 'EMU', 27: 'CAPUCHINBIRD', 28: 'EUROPEAN TURTLE DOVE', 29: 'TRUMPTER SWAN', 30: 'GRAY CATBIRD', 31: 'PELICAN', 32: 'JAVA SPARROW', 33: 'STEAMER DUCK', 34: 'CASPIAN TERN', 35: 'JABIRU', 36: 'HAMMERKOP', 37: 'EMPEROR PENGUIN', 38: 'BIRD OF PARADISE', 39: 'BALI STARLING'}
dict_values(['GREAT GRAY OWL', 'CRESTED KINGFISHER', 'KOOKABURRA', 'VIOLET GREEN SWALLOW', 'STORK BILLED KINGFISHER', 'AFRICAN CROWNED CRANE', 'ROCK DOVE', 'NO

In [5]:
# Function to store labels based on the folders.

def get_data_labels(path, birds, dimensions):
    data = []
    labels = []

    for bird in birds:
        imgs = [cv2.resize(cv2.imread(img), dimensions, interpolation=cv2.INTER_AREA) for img in glob.glob(path + "/" + bird + "/*.jpg")]
        for img in imgs:
            data.append(img)
            labels.append(name_to_idx[bird])
            
    return np.array(data), np.array(labels)

In [6]:
data_train, labels_train = get_data_labels(train_path, idx_to_name.values(), (224,224))
data_test, labels_test = get_data_labels(test_path, idx_to_name.values(), (224,224))
data_valid, labels_valid = get_data_labels(valid_path, idx_to_name.values(), (224,224))

print(data_train.shape)

(5646, 224, 224, 3)


In [7]:
def normalize(data):
    data = data / 255.0
    data = data.astype('float32')
    return data

def one_hot(labels):
    labels = np.eye(len(np.unique(labels)))[labels]
    return labels

In [8]:
data_train = normalize(data_train)
data_test = normalize(data_test)
data_valid = normalize(data_valid)

labels_train = one_hot(labels_train)
labels_test = one_hot(labels_test)
labels_valid = one_hot(labels_valid)

IndexError: index 39 is out of bounds for axis 0 with size 39

# Architecture

# Implementation

In [None]:
weights_path = "./dataset/pre-trained/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5"
base_vgg16 = VGG16(weights=weights_path, include_top=False, input_shape=(224, 224, 3))
base_vgg16.trainable = False
base_vgg16.summary()

In [None]:
# Freezing the weights
#for layer in base_model.layers:
    #layer.trainable = False
 
# base_model.summary()

In [None]:
# use “get_layer” method to save the last layer of the network
#last_layer = base_model.get_layer('block5_pool')
# save the output of the last layer to be the input of the next layer
#last_output = last_layer.output
 
# flatten the classifier input which is output of the last layer of VGG16 model
#x = Flatten()(last_output)
 
# add our new softmax layer with 3 hidden units
#x = Dense(nr_birds, activation='softmax', name='softmax')(x)

In [None]:
# instantiate a new_model using keras’s Model class
#new_model = Model(inputs=base_model.input, outputs=x)

new_model = Sequential()
new_model.add(base_vgg16)

new_model.add(Flatten()) #1024#model.add(Dense(256)) 
new_model.add(Activation('relu'))
new_model.add(Dense(nr_birds)) 
new_model.add(Activation('softmax'))
 
# print the new_model summary
new_model.summary()

In [None]:
new_model.compile(Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
print(len(data_train))
checkpointer = ModelCheckpoint(filepath='birds.model.hdf5', save_best_only=True)

history = new_model.fit(data_train, labels_train, steps_per_epoch=len(data_train),
validation_data=(data_test, labels_test), validation_steps=len(data_valid), epochs=10, verbose=1, batch_size=len(data_train), callbacks=[checkpointer])

In [None]:
# Analyze Training Data

In [None]:
plt.plot(history.history['val_accuracy'], 'b')
plt.plot(history.history['val_loss'], 'r')
plt.show()

In [None]:
def get_accuracy(model, data_valid, labels_valid):
    predictions = model(data_valid)
    wrong = 0
    for i, pred in enumerate(predictions):
        if( np.argmax(pred) !=  np.argmax(labels_valid[i])):
            wrong += 1
    return (len(data_valid) - wrong) / len(data_valid)

In [None]:
# we use the validation data to verify the accuracy
accuracy = get_accuracy(new_model, data_valid, labels_valid)
print("Accuracy:", accuracy)

In [None]:
new_model.save('birds-trained-model')

# Resources

Architectures images from [Deep Learning for Vision Systems Book](https://www.manning.com/books/deep-learning-for-vision-systems)