# Birds Classifications

## Definition of Transfer Learning

Transfer learning (TL) is a research problem in machine learning (ML) that focuses on storing knowledge gained while solving one problem and applying it to a different but related problem. For example, knowledge gained while learning to recognize cars could apply when trying to recognize trucks. This area of research bears some relation to the long history of psychological literature on transfer of learning, although formal ties between the two fields are limited. From the practical standpoint, reusing or transferring information from previously learned tasks for the learning of new tasks has the potential to significantly improve the sample efficiency of a reinforcement learning agent.
<br><br>
Ressource from : [Wikipedia](https://en.wikipedia.org/wiki/Transfer_learning)

In [1]:
import pandas as pd
import numpy as np
import os
import cv2
import glob
import matplotlib.pyplot as plt
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Flatten, Activation
from tensorflow.keras.models import Model, Sequential

# Split Data

In [2]:
# Splitting the data starts here, we store the path to the training set into a variable for further referencing.

train_path = './dataset/train'
test_path = './dataset/test'
valid_path = './dataset/valid'

birds = np.array(list(os.listdir(train_path)))
print(birds)

['CREAM COLORED WOODPECKER' 'RUFUOS MOTMOT' 'WHITE TAILED TROPIC'
 'KOOKABURRA' 'TREE SWALLOW' 'AMERICAN REDSTART' 'EASTERN TOWEE'
 'IBERIAN MAGPIE' 'HOOPOES' 'BELTED KINGFISHER' 'CRESTED CARACARA'
 'IMPERIAL SHAQ' 'VIOLET GREEN SWALLOW' 'CAPE MAY WARBLER'
 'EASTERN MEADOWLARK' 'EUROPEAN GOLDFINCH' 'HOATZIN' 'TAIWAN MAGPIE'
 'RED BELLIED PITTA' 'BARRED PUFFBIRD' 'SNOWY OWL' 'HAMMERKOP'
 'NORTHERN GOSHAWK' 'AFRICAN EMERALD CUCKOO' 'ASHY THRUSHBIRD'
 'BANDED STILT' 'COPPERY TAILED COUCAL' 'NORTHERN RED BISHOP'
 'PURPLE FINCH' 'TAKAHE' 'SANDHILL CRANE' 'KING VULTURE' 'SHOEBILL'
 'TRUMPTER SWAN' 'BLACK-NECKED GREBE' 'FIORDLAND PENGUIN' 'AMERICAN COOT'
 'HEPATIC TANAGER' 'PUFFIN' 'BLACKBURNIAM WARBLER' 'NORTHERN FULMAR'
 'BROWN THRASHER' 'PALILA' 'JABIRU' 'ANNAS HUMMINGBIRD' 'HIMALAYAN MONAL'
 'CRESTED COUA' 'NOISY FRIARBIRD' 'CALIFORNIA GULL' 'PARUS MAJOR'
 'VENEZUELIAN TROUPIAL' 'BIRD OF PARADISE' 'STRAWBERRY FINCH'
 'RED HONEY CREEPER' 'RED FACED WARBLER' 'EMPEROR PENGUIN'
 'CHINESE BAMB

In [3]:
# pick only 20 type of birds to train on
nr_birds = 30

np.random.shuffle(birds)
# slicing the data
birds = birds[:nr_birds]

In [4]:
idx_to_name = {i:x for (i,x) in enumerate(birds)}
name_to_idx = {x:i for (i,x) in enumerate(birds)}
print(idx_to_name)

print(idx_to_name.values())

{0: 'AMERICAN PIPIT', 1: 'SCARLET MACAW', 2: 'ABBOTTS BOOBY', 3: 'GREAT JACAMAR', 4: 'ORIENTAL BAY OWL', 5: 'PEREGRINE FALCON', 6: 'STRIPPED SWALLOW', 7: 'WILD TURKEY', 8: 'RED HEADED DUCK', 9: 'FLAME TANAGER', 10: 'MOURNING DOVE', 11: 'HELMET VANGA', 12: 'WHITE THROATED BEE EATER', 13: 'HARPY EAGLE', 14: 'RED TAILED THRUSH', 15: 'NORTHERN MOCKINGBIRD', 16: 'GO AWAY BIRD', 17: 'HARLEQUIN QUAIL', 18: 'CRAB PLOVER', 19: 'BLONDE CRESTED WOODPECKER', 20: 'SANDHILL CRANE', 21: 'BULWERS PHEASANT', 22: 'AMERICAN BITTERN', 23: 'CHUKAR PARTRIDGE', 24: 'BROWN CREPPER', 25: 'ALTAMIRA YELLOWTHROAT', 26: 'HORNED SUNGEM', 27: 'ANDEAN GOOSE', 28: 'BOBOLINK', 29: 'ABYSSINIAN GROUND HORNBILL'}
dict_values(['AMERICAN PIPIT', 'SCARLET MACAW', 'ABBOTTS BOOBY', 'GREAT JACAMAR', 'ORIENTAL BAY OWL', 'PEREGRINE FALCON', 'STRIPPED SWALLOW', 'WILD TURKEY', 'RED HEADED DUCK', 'FLAME TANAGER', 'MOURNING DOVE', 'HELMET VANGA', 'WHITE THROATED BEE EATER', 'HARPY EAGLE', 'RED TAILED THRUSH', 'NORTHERN MOCKINGBIRD', 

In [5]:
# Function to store labels based on the folders.

def get_data_labels(path, birds, dimensions):
    data = []
    labels = []

    for bird in birds:
        imgs = [cv2.resize(cv2.imread(img), dimensions, interpolation=cv2.INTER_AREA) for img in glob.glob(path + "/" + bird + "/*.jpg")]
        for img in imgs:
            data.append(img)
            labels.append(name_to_idx[bird])
            
    return np.array(data), np.array(labels)

In [6]:
data_train, labels_train = get_data_labels(train_path, idx_to_name.values(), (224,224))
data_test, labels_test = get_data_labels(test_path, idx_to_name.values(), (224,224))
data_valid, labels_valid = get_data_labels(valid_path, idx_to_name.values(), (224,224))

print(data_train.shape)

(4346, 224, 224, 3)


In [7]:
def normalize(data):
    data = data / 255.0
    data = data.astype('float32')
    return data

def one_hot(labels):
    labels = np.eye(len(np.unique(labels)))[labels]
    return labels

In [8]:
data_train = normalize(data_train)
data_test = normalize(data_test)
data_valid = normalize(data_valid)

labels_train = one_hot(labels_train)
labels_test = one_hot(labels_test)
labels_valid = one_hot(labels_valid)

# Architecture

# Implementation

In [9]:
weights_path = "./dataset/pre-trained/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5"
base_vgg16 = VGG16(weights=weights_path, include_top=False, input_shape=(224, 224, 3))
base_vgg16.trainable = False
base_vgg16.summary()

2022-05-01 11:44:29.061649: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-01 11:44:29.063759: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (N

In [10]:
# Freezing the weights
#for layer in base_model.layers:
    #layer.trainable = False
 
# base_model.summary()

In [11]:
# use “get_layer” method to save the last layer of the network
#last_layer = base_model.get_layer('block5_pool')
# save the output of the last layer to be the input of the next layer
#last_output = last_layer.output
 
# flatten the classifier input which is output of the last layer of VGG16 model
#x = Flatten()(last_output)
 
# add our new softmax layer with 3 hidden units
#x = Dense(nr_birds, activation='softmax', name='softmax')(x)

In [12]:
# instantiate a new_model using keras’s Model class
#new_model = Model(inputs=base_model.input, outputs=x)

new_model = Sequential()
new_model.add(base_vgg16)

new_model.add(Flatten()) #1024#model.add(Dense(256)) 
new_model.add(Activation('relu'))
new_model.add(Dense(nr_birds)) 
new_model.add(Activation('softmax'))
 
# print the new_model summary
new_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 activation (Activation)     (None, 25088)             0         
                                                                 
 dense (Dense)               (None, 30)                752670    
                                                                 
 activation_1 (Activation)   (None, 30)                0         
                                                                 
Total params: 15,467,358
Trainable params: 752,670
Non-trainable params: 14,714,688
_________________________________________________________________


In [13]:
new_model.compile(Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
print(len(data_train))
checkpointer = ModelCheckpoint(filepath='birds.model.hdf5', save_best_only=True)

history = new_model.fit(data_train, labels_train, steps_per_epoch=len(data_train),
validation_data=(data_test, labels_test), validation_steps=len(data_valid), epochs=30, verbose=1, batch_size=len(data_train), callbacks=[checkpointer])

4346


In [None]:
# Analyze Training Data

In [None]:
plt.plot(history.history['val_accuracy'], 'b')
plt.plot(history.history['val_loss'], 'r')
plt.show()

In [None]:
def get_accuracy(model, data_valid, labels_valid):
    predictions = model(data_valid)
    wrong = 0
    for i, pred in enumerate(predictions):
        if( np.argmax(pred) !=  np.argmax(labels_valid[i])):
            wrong += 1
    return (len(data_valid) - wrong) / len(data_valid)

In [None]:
# we use the validation data to verify the accuracy
accuracy = get_accuracy(new_model, data_valid, labels_valid)
print("Accuracy:", accuracy)

In [None]:
new_model.save('birds-trained-model')

# Resources

Architectures images from [Deep Learning for Vision Systems Book](https://www.manning.com/books/deep-learning-for-vision-systems)