In [None]:
## For data handling
import pandas as pd
import numpy as np

## For plotting
import matplotlib.pyplot as plt
from seaborn import set_style
import seaborn as sns
## This sets the plot style
## to have a grid on a white background
set_style("whitegrid")

#For machine learning methods
from sklearn.model_selection import train_test_split

#For neural network
from tensorflow import keras 
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing import image
## Importing the things
from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics
from keras.utils import to_categorical
import json

#import skimage
#print(skimage.__version__)

import warnings
warnings.filterwarnings('ignore')

## Checks what is tensorflow running on
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

## This imports datasets stored in keras
from keras.datasets import mnist

# 1. Import data as notebook 1

In [None]:
birds_db = pd.read_csv('./birds_archive/birds.csv')
birds_db.value_counts('labels')
birds_class = pd.read_csv('./birds_archive/class_dict.csv')
birds_names = pd.read_csv('./birds_archive/birds latin names.csv')

print(birds_names.head(2))
print('..................................\n')
print(birds_class.head(2))
print('..................................\n')
(birds_db.tail())

birds_names_subset = birds_names.sample(100, random_state=42)
birds_names_subset = birds_names_subset.rename(columns={'class':'labels'})
print(birds_names_subset)

birds_db_subset = birds_db[birds_db['labels'].isin(birds_names_subset['labels'])].dropna()
birds_db_subset = birds_db_subset.reset_index()
birds_db_subset

label_dict = {i:birds_names_subset['labels'].values[i] for i in range(len(birds_names_subset))}
labelsDF = pd.DataFrame(label_dict.items(), columns=['label_index','label'])


In [None]:
%%time 
from tensorflow.keras.preprocessing import image
# this piece loads image data into a list and a numpy array 
bird_labels = []
bird_label_num = []
birdImage_list_jpg = []
birdImage_list = []
#birdImage_arr = np.zeros(1)
#birdImage_arr = np.delete(birdImage_ar, 0)
for indx, filepath, label in zip(range(len(birds_db_subset)), birds_db_subset.filepaths, birds_db_subset.labels):
    try:
        bird_labels.append(label)
        bird_label_num.append(labelsDF.loc[labelsDF['label'] == label, 'label_index'].values[0])
        #print(filepath)
        img = tf.keras.utils.load_img(
                './birds_archive/Data/'+filepath,
                grayscale=False,
                color_mode='rgb',
                target_size=None,
                interpolation='nearest',
                keep_aspect_ratio=False)
        
        # list of bird images in jpg format 
        birdImage_list_jpg.append(img)  
        
        img_arr = image.img_to_array(img)
        birdImage_list.append(img_arr)
        img_arr_ = img_arr.reshape((1,) + img_arr.shape)
        # List containng all bird images each in array format 
        #birdImage_arr = np.append(birdImage_arr, img_arr)
    except (TypeError, IndexError) as e:
        pass
        #print(indx, label)

# Numpy array N x 224x224x3 containing all bird images 
birdImage_arr = np.array(birdImage_list) 

X_train, X_val, y_train, y_val = train_test_split(birdImage_arr, (pd.DataFrame(bird_label_num)[0]).values,
                                                                          test_size=0.15, shuffle=True, random_state=44)
print(X_train.shape)
print()
print(X_val.shape)

In [None]:
fig,ax = plt.subplots(1,4,figsize=(18,18))
ax[0].imshow(img_arr[:, : , :].astype('uint8'))
ax[1].imshow(img_arr[:, : , 0].astype('uint8'))
ax[2].imshow(img_arr[:, : , 1].astype('uint8'))
ax[3].imshow(img_arr[:, : , 2].astype('uint8'))
plt.show()
plt.close()

# 2. Image augmentation 
- with Keras ImageDataGenerator

- The idea behind augmented images is that to make sure the neural network does not see the exact same image twice during the training, effectively creating an illusion to the the model under training that the training sample is much larger then it really is. 

- For a training data set of size 1000, for example, each epoch uses all of the data exactly once. If the batch size is 50, there will be 1000/50 = 20 passes or iterations in each epochs, making 20 updates on the initial (randomly assigned) weights and biases. If we select the number of epochs to be 30, it means the neural network will see each image (training data point) 30 times in total. 

- If we implement augmentation on this, with batch size = 50, and number of epoch = 30, during the training the model will not see the original data points directly but a random transformation of them as defined in the $ImageDataGenerator$. For each epoch, each  pass will include 50 of a transformed version of original image, covering all of the training images (transformed) in one epoch. For the next epoch, this will reapeat with a new set of transformed version of all original images. This way the model will be trianed with a "new" set of 1000 images in this epoch. For all of 30 epochs each of the origianl images will appear to the model in a slightly different version, via the transformation (augmentation). 

- In short, the model will see 30 versions of each of 1000 images at every epoch that are different but highly correlated. 

In [None]:
datagen=ImageDataGenerator(rotation_range=20)
iter=datagen.flow(samples,batch_size=2)
batch=iter.next()
plt.imshow(batch[0].astype('uint8'))

# 3. Exploring augmented data

In [None]:
# ImageDataGenerator rotation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(rotation_range=30, width_shift_range=0.2,
                             height_shift_range=0.2,shear_range=0.2,zoom_range=0.2,
                             horizontal_flip=True,fill_mode='nearest')
datagen=ImageDataGenerator(rotation_range=30)
# iterator: contains all possible random augmentation in the ranges as defined above
aug_iter = datagen.flow(img_arr_, batch_size=1)

# generate samples and plot
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(15,15))

# generate batch of images
for i in range(3):
    if i>5:
        break
    else:
        # convert to unsigned integers
        image = next(aug_iter)[0].astype('uint8')
 
        # plot image
        ax[i].imshow(image)
        ax[i].axis('off')

# 4. Convolutional Neural Network: architecture
- This is baseline model built on two fundamental principles of computer vision:  $\textbf{1. Translational invariance}$ and $\textbf{2. Spatial hiararchy}$. This is a simple model same as in notebook 1 but we have implemented with augmentation. 

In [None]:
from keras.regularizers import l2 

modelB = models.Sequential()
n_species = to_categorical(y_train).shape[1]
# Add convolutional layer
#model.add( layers.Conv3D(32, (3,3,3), activation='relu', input_shape=(224,224,3,1)) )
modelB.add( tf.keras.layers.Conv2D(64, 3, activation='relu', input_shape=(224,224, 3)))
modelB.add( layers.MaxPool2D((2,2), strides=2) ) 
# more layers 
modelB.add( layers.Conv2D(64, (3,3), activation='relu') )
modelB.add( layers.MaxPool2D((2,2), strides=2))
modelB.add( layers.Conv2D(128, (3,3), activation='relu') )

modelB.add( layers.MaxPool2D((2,2), strides=2))
modelB.add( layers.Conv2D(128, (3,3), activation='relu')) 

# .Flatten() will flatten the data for us
modelB.add(layers.Flatten())
modelB.add(layers.Dropout(.5))

## # Now we'll add the fully connected layer >>> feedforward
modelB.add(layers.Dense(128, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)) )
modelB.add(layers.Dense(128, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
## Finally an output layer
modelB.add(layers.Dense(n_species, activation='softmax'))

modelB.compile(optimizer= optimizers.RMSprop(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

modelB.summary()

In [None]:
# OPTIONAL 
from keras.models import load_model
modelB = load_model('convNet_birds_baselineAugm.h5')
modelB.summary()

# 5. We build data generator object for training set that supplies augmented data points for the model
- validation set should not be augmented as it is not required for updating weights and biases 


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=40, width_shift_range=0.2,
                             height_shift_range=0.2, shear_range=0.2, zoom_range=0.2,
                             horizontal_flip=True, fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255) # we don't augment validation images obviously! 

train_generator = train_datagen.flow(X_train,to_categorical(y_train), batch_size=32)
validation_generator = val_datagen.flow(X_val,to_categorical(y_val), batch_size=32)

In [None]:
%%time
# Run the model 
batchSize = 32

callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=5,  restore_best_weights=True)
## First make the validation set

# ## Fit the model, and store the training history
## using 100 epochs and a batch_size of 512
n_epoch= 10
historyB = modelB.fit(train_generator,steps_per_epoch=X_train.shape[0] // batchSize, epochs=n_epoch,
                                validation_data=validation_generator,validation_steps = X_val.shape[0]//batchSize,
                               verbose=1)

historyB_dict = historyB.history
print(historyB_dict.keys())



In [None]:
modelB.save("convNet_birds_baselineAug.h5")
print("Saved model to disk")

# 6. Model performance
- we save our array and retrain later and add the improvement to the existing list of epochs

In [None]:
# run ONLY once
valB_accuracy = []
trainB_accuracy = []

In [None]:
for tr, val, loss in zip(historyB_dict['accuracy'], historyB_dict['val_accuracy'], historyB_dict['val_loss']): 
    trainB_accuracy.append(tr)
    valB_accuracy.append(val)
    
with open("train_accuracy", "w") as fp:
    json.dump(trainB_accuracy, fp)

with open("validation_accuracy", "w") as fp:
    json.dump(valB_accuracy, fp)

In [None]:
with open("train_accuracy", "r") as fp:
    trainB_accuracy = json.load(fp)   
with open("validation_accuracy", "r") as fp:
    valB_accuracy = json.load(fp)
    
N = len(valB_accuracy) #n_epoch-1
# Display the metrics
set_style("whitegrid")
plt.figure(figsize=(16,10))
plt.plot(range(1,N+1), trainB_accuracy,'b--', markersize=5, alpha=0.5, label='training accuracy')
plt.plot(range(1,N+1), valB_accuracy,'g--', markersize=5, alpha=0.7, label='validation accuracy')
plt.plot(range(1,N+1), valB_accuracy,'ro', markersize=3, alpha=1, label='validation accuracy')
plt.xlabel('Epoch', fontsize=16)
plt.ylabel('Accuracy', fontsize=16)

plt.axhline(y=0.5, color='crimson', ls='--', alpha=1)
plt.axhline(y=np.array(valB_accuracy).max(), color='k', ls='--')

plt.text(1,np.round(np.array(valB_accuracy).max(),2)+0.01, 
         '$Max ~validation ~ accuracy$ = ' +str(np.round(np.array(valB_accuracy).max(),2)), 
         color='k', fontsize=12 )


plt.legend(fontsize=14)
#plt.savefig('perfomance_convNetBaseline_reg.pdf')
plt.show()
plt.close()

set_style("white")

# Prediction power: any randomly selected image from the validation set
n= np.random.binomial(100,0.5,1)[0]

# we need to reshape input image to match the dimensions of sample 
predicted_bird = modelB.predict(X_val[n].reshape(-1, 224, 224, 3)).argmax()
actual_bird = y_val[n]

plt.figure(figsize=(10,10))
plt.imshow(X_val[n].astype('uint8'), cmap='gray')
plt.text(1,220, 'Predicted: ' + str(labelsDF.loc[labelsDF['label_index'] == predicted_bird, 'label'].values[0]),
         color='yellow', fontsize=14 )

plt.text(1,210, 'Actual: ' + str(labelsDF.loc[labelsDF['label_index'] == actual_bird, 'label'].values[0]),
         color='brown', fontsize=14 )


plt.show()
plt.close()