# Learning Deep Features
###### Transfer learning (TL) is a research problem in machine learning (ML) that focuses on storing knowledge gained while solving one problem and applying it to a different but related problem.[1] For example, knowledge gained while learning to recognize cars could apply when trying to recognize trucks. This area of research bears some relation to the long history of psychological literature on transfer of learning, although practical ties between the two fields are limited. From the practical standpoint, reusing or transferring information from previously learned tasks for the learning of new tasks has the potential to significantly improve the sample efficiency of a reinforcement learning agent.[2]

## 1. Data Preparation and Deep Features Extraction

As follow images are imported, rendered into tensors, and their vector representation is extracted from VGG19. Then, Classes are selected. 

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from collections import Counter
import tensorflow as tf
import numpy as np
import json
import sys
import os

In [None]:
f = open('train.json')
annotations = json.load(f)
image_files = set(os.listdir('NEW'))

image_IDs = {}

for i in annotations['images']: # i['id']
    if i['file_name'].split('/')[-1] in image_files:
        image_IDs[i['id']] = i['file_name'].split('/')[-1]
        
imgID_catIDs = {}

for j in annotations['annotations']:
    if j['image_id'] in image_IDs.keys():
        imgID_catIDs[image_IDs[j['image_id']]] = j['category_id']
        
catIDs_names = {}

for r in annotations['categories']:
    if r['id'] in imgID_catIDs.values():
        catIDs_names[r['id']] = r['family']
        
dict_labels = {}

for val in image_IDs.values():
    dict_labels[val] = catIDs_names[imgID_catIDs[val]]

In [None]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.applications.vgg19 import VGG19
from keras.models import Model

base_model = VGG19(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

def extract_feature(model, img):
    feature = model.predict(img)[0]
    feature /= np.linalg.norm(feature)  
    return (feature)

deep_features = []
file_names = []
animals = []

for i, k in enumerate(dict_labels.keys()):
    img_path = "NEW/" + k
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    deep_feature = extract_feature(model, x)
    
    deep_features.append(deep_feature)
    file_names.append(k)
    animals.append(dict_labels[k])
    
    sys.stdout.write("\rFinished iteration: %i" % i)
    sys.stdout.flush() 

In [None]:
subset_features = [deep_features[i] for i in range(len(deep_features)) if animals[i]!= 'Scincidae' and animals[i]!= 'Anguidae']
subset_animals = [animals[i] for i in range(len(animals)) if animals[i]!= 'Scincidae' and animals[i]!= 'Anguidae']

# Label encoder of the animal name (to numbers first, then one-hot encoding).
labels = LabelEncoder().fit_transform(subset_animals)

# Generate Train set (70%), test set (30%)
X_train, X_other, y_train, y_other = train_test_split(subset_features, labels, stratify = labels, test_size = 0.3, random_state = 999)
X_val, X_test, y_val, y_test = train_test_split(X_other, y_other, stratify = y_other, test_size = 0.3333, random_state = 999)

## 2. Machine Learning Classification
Henceforth, four support vecor machines are run using four different kernels, their test accuracy is evaluated and their confusion matrices are represented

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn import svm

In [None]:
linear = svm.SVC(kernel='linear', C=1, decision_function_shape='ovo').fit(X_train, y_train)
accuracy_lin = linear.score(X_test, y_test)
print('Accuracy Linear Kernel:', accuracy_lin)

In [None]:
rbf = svm.SVC(kernel='rbf', gamma=1, C=1, decision_function_shape='ovo').fit(X_train, y_train)
accuracy_rbf = rbf.score(X_test, y_test)
print('Accuracy Radial Basis Kernel:', accuracy_rbf)

In [None]:
poly = svm.SVC(kernel='poly', degree=3, C=1, decision_function_shape='ovo').fit(X_train, y_train)
accuracy_poly = poly.score(X_test, y_test)
print('Accuracy Polynomial Kernel:', accuracy_poly)

In [None]:
sig = svm.SVC(kernel='sigmoid', C=1, decision_function_shape='ovo').fit(X_train, y_train)
accuracy_sig = sig.score(X_test, y_test)
print('Accuracy Sigmoid Kernel:', accuracy_sig)

In [None]:
import pickle
pickle.dump(sig, open('sigmoid.sav', 'wb'))
pickle.dump(poly, open('polynomial.sav', 'wb'))
pickle.dump(rbf, open('Radial.sav', 'wb'))
pickle.dump(linear, open('linear.sav', 'wb'))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

linear_pred = linear.predict(X_test)
poly_pred = poly.predict(X_test)
rbf_pred = rbf.predict(X_test)
sig_pred = sig.predict(X_test)

plt.figure(figsize = (12, 12))
cm_lin = pd.DataFrame(confusion_matrix(y_test, linear_pred))
heatmap = sns.heatmap(cm_lin/cm_lin.sum(axis=1), vmin=0, vmax=1, annot=True)
plt.savefig(f'linear SVM accuracy {accuracy_lin}.png')
plt.show()

plt.figure(figsize = (12, 12))
cm_poly = pd.DataFrame(confusion_matrix(y_test, poly_pred))
heatmap = sns.heatmap(cm_poly/cm_poly.sum(axis=1), vmin=0, vmax=1, annot=True)
plt.savefig(f'Polynomial SVM accuracy {accuracy_poly}.png')
plt.show()

plt.figure(figsize = (12, 12))
cm_rbf = pd.DataFrame(confusion_matrix(y_test, rbf_pred))
heatmap = sns.heatmap(cm_rbf/cm_rbf.sum(axis=1), vmin=0, vmax=1, annot=True)
plt.savefig(f'Radial SVM accuracy {accuracy_rbf}.png')
plt.show()

plt.figure(figsize = (12, 12))
cm_sig = pd.DataFrame(confusion_matrix(y_test, sig_pred))
heatmap = sns.heatmap(cm_sig/cm_sig.sum(axis=1), vmin=0, vmax=1, annot=True)
plt.savefig(f'Sigmoid SVM accuracy {accuracy_sig}.png')
plt.show()