In [None]:
## For data handling
import pandas as pd
import numpy as np

## For plotting
import matplotlib.pyplot as plt
from seaborn import set_style
import seaborn as sns
## This sets the plot style
## to have a grid on a white background
set_style("white")

# For preparing data 
from sklearn.model_selection import train_test_split

#For neural network
from tensorflow import keras 
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing import image

## Importing the things
from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics
from keras.utils import to_categorical
import json

#import skimage
#print(skimage.__version__)

import warnings
warnings.filterwarnings('ignore')

## Checks what is tensorflow running on
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

# 1. Define directories that will be used for ImageDataGenerator to load train, validation, and test data sets
- For this model, I used the data set that was selected by my group members with a theme that we would be classifying birds species found in the New York area. 


In [None]:
import os
original_dataset_dir = './birds_archive/'
base_dir = './DataNY/'
#os.mkdir(base_dir)

train_dir = os.path.join(base_dir, 'train')
#os.mkdir(train_dir)

validation_dir = os.path.join(base_dir, 'valid')
#os.mkdir(validation_dir)

test_dir = os.path.join(base_dir, 'test')
#os.mkdir(test_dir)

# 2. Data augmentation using ImageDataGenerator
- note: we scale pixel values by diving them by 255, the maximum value

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
batchSize = 64
train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=40, width_shift_range=0.2,
                             height_shift_range=0.2, shear_range=0.2, zoom_range=0.2,
                             horizontal_flip=True, fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255) # we don't augment validation/test images obviously! 

train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224, 224), 
                                                    batch_size=batchSize, class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(validation_dir,target_size=(224, 224),
                                                        batch_size=batchSize_valid,class_mode='categorical')
test_generator = test_datagen.flow_from_directory(test_dir,target_size=(224, 224),
                                batch_size=1, shuffle = False, color_mode="rgb",class_mode='categorical')


# we will use these for running our model
n_training_images = train_generator.n #len(train_generator.labels), 
n_species = len(np.unique(train_generator.labels))
n_valid_images = validation_generator.n

# 3. Pre-trained model: VGG16
- Since our base model and that with augmented data did not quite resolve the overfitting issue, we resort to pre-trained model. This is because often smaller datasets require very long training time and still tend to overfit, resulting in poor prediction power for validation or test data. The idea of transfer learning is that we use a model that was previously trained on a very large data set. Since visual learning follows hiararchical structure the weights in convolutional layers in the pre-trained can stil be able to detect pattarns and pick up features at different parts of the input image, following the principle of statial invariance. 

- The main difference is the output layer. Every classificaton problem has its own number of classes so the output varies. For this reason, in tranfer learning, we load only the convolutional layers of a pre-trained model and add a feed-forward dense network, working as the classifier  for outr problem, on top of the convolutional base. The convolutional base from the pre-trained model is usually frozen in order to utilize the pre-trianed weights. In addiiton to being an efficient pattern-detecting network this save us a great deal of time and resources. 

- I implemented this method, which improved our validation accuracy largely.VGG16 is used for this purpose. It is among the most popular and highly performing model. Although, it is a heavy model and might be considered outdated but as a first project in neural network particularly with computer vision, I found VGG16 is great for understanding the inside architecture. I also used VGG19 but the performance differrence for our case is not much.

- I also augmented our training data to improve accuracy. 

- Lasty, I applied another aspect of transfer learning, which is fine-tuning the last layer of the VGG model which accommodates better custom-fitting, and this improved the final validation accuracy to 95%. 

In [None]:
from keras.applications import VGG16
conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
conv_base.summary()

# 4. Add a classier network on top on the VGG16 convolutional base

In [None]:
modelvggAugFT = models.Sequential()
modelvggAugFT.add(conv_base) # <<----- frozen base 

modelvggAugFT.add(layers.Flatten())
#modelvggAug.add(layers.Dense(256, activation='relu'))
modelvggAugFT.add(layers.Dense(256, activation='relu'))#, input_dim=7*7*512) )
modelvggAugFT.add(layers.Dropout(.5))
modelvggAugFT.add(layers.Dense(128, activation='relu'))#, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
## Finally an output layer
modelvggAugFT.add(layers.Dense(n_species, activation='softmax'))

modelvggAugFT.summary()

# 5. Freeze all convolutional layers  except for the last one
- This step helps improve the model performance since the outer convolutional layers are likely to recognize specific patterns related to birds. Hence, we free the outter 3 layers and train according to our data.  


In [None]:
print('This is the number of trainable weights '
'before partially freezing the conv base:', len(modelvggAugFT.trainable_weights))

conv_base.trainable = True
set_trainable = False
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False
        
print('This is the number of trainable weights '
'after partially freezing the conv base:', len(modelvggAugFT.trainable_weights))

In [87]:
# OPTIONAL 
from keras.models import load_model
modelvggAugFT = load_model('convNetvgg16_AugFT100NYa.h5')
modelvggAugFT.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 dense (Dense)               (None, 256)               6422784   
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 100)               12900     
                                                                 
Total params: 21,183,268
Trainable params: 13,548,004


In [None]:
# OPTIONAL 
# Check if I have frozen the correct layers
for i, layer in enumerate(conv_base.layers[:20]):
    print(i, layer.name, layer.trainable)

# 6. Train the model

In [None]:
%%time
# Compile the model 
from keras import optimizers
modelvggAugFT.compile(optimizer=optimizers.RMSprop(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])


# Run the model 

callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', mode='max', patience=20,  restore_best_weights=True)
## First make the validation set

# ## Fit the model, and store the training history
## using 100 epochs and a batch_size of 512
n_epoch= 10
historyvggAugFT = modelvggAugFT.fit_generator(train_generator,steps_per_epoch = n_training_images//batchSize, epochs = n_epoch,
                                validation_data = validation_generator,
                                validation_steps = 1 , callbacks=[callback], verbose=1)

historyvggAugFT_dict = historyvggAugFT.history
print(historyvggAugFT_dict.keys())

In [None]:
modelvggAugFT.save("convNetvgg16_AugFT100NYa.h5")
print("Saved model to disk")

# 6. Model performance

In [None]:
# run ONLY once
val_accuracy = []
train_accuracy = []

In [None]:
for tr, val, loss in zip(historyvggAugFT_dict['accuracy'], historyvggAugFT_dict['val_accuracy'], historyvggAugFT_dict['val_loss']): 
    train_accuracy.append(tr)
    val_accuracy.append(val)
    
with open("train_accuracy", "w") as fp:
    json.dump(train_accuracy, fp)

with open("validation_accuracy", "w") as fp:
    json.dump(val_accuracy, fp)

In [None]:
with open("train_accuracy", "r") as fp:
    train_accuracy = json.load(fp)   
with open("validation_accuracy", "r") as fp:
    val_accuracy = json.load(fp)
    
N = len(val_accuracy) #n_epoch-1
# Display the metrics
set_style("whitegrid")
plt.figure(figsize=(16,10))
plt.plot(range(1,N+1), train_accuracy,'b--^', markersize=5,label='training accuracy')
plt.plot(range(1,N+1), val_accuracy,'g--o', markersize=5, alpha=0.8, label='validation accuracy')
plt.xlabel('Epoch', fontsize=16)
plt.ylabel('Accuracy', fontsize=16)

plt.axhline(y=0.5, color='crimson', ls='--', alpha=1)
plt.axhline(y=np.array(val_accuracy).max(), color='k', ls='--')

plt.text(1,np.round(np.array(val_accuracy).max(),2)+0.01, 
         '$Max ~validation ~ accuracy$ = ' +str(np.round(np.array(val_accuracy).max(),2)), 
         color='k', fontsize=12 )

plt.title('Sample size='+str(n_training_images+ n_valid_images)+', with'+str(n_species)+' species', fontsize=20, loc='center', pad=None)

plt.legend(fontsize=14)
#plt.savefig('performance_vgg16AugFT100NY.pdf')
plt.show()
plt.close()



# 7. Model performance based on a random image from test data folder 

In [None]:
set_style("white")
# Prediction power
n=np.random.binomial(test_generator.n,0.5,1)[0]
#plt.imshow(test_image)
#plt.show()
#filenames = test_generator.filenames

test_image = tf.keras.utils.load_img(test_generator.filepaths[n],grayscale=False,color_mode='rgb',
                target_size=None,interpolation='nearest',keep_aspect_ratio=False)

predicted_bird = modelvggAugFT.predict(image.img_to_array(test_image).reshape(-1,224,224,3)).argmax()
actual_bird = test_generator.classes[n]

plt.figure(figsize=(10,10))
plt.imshow(test_image)#, cmap='gray')
plt.text(1,220, 'Predicted: ' + str(labelsDF.loc[labelsDF['label_index'] == predicted_bird, 'labels'].values[0]),
         color='yellow', fontsize=14 )

plt.text(1,210, 'Actual: ' + str(labelsDF.loc[labelsDF['label_index'] == actual_bird, 'labels'].values[0]),
         color='brown', fontsize=14 )


plt.show()
plt.close()

# # 7. Model performance on the test data folder 

In [None]:
modelvggAugFT = load_model('convNetvgg16_AugFT100NYa.h5')
labelsDF = pd.read_csv('labelsDF.csv')

img_list = []

for PATH in test_generator.filepaths:
    img = tf.keras.utils.load_img(PATH,grayscale=False,color_mode='rgb',
                    target_size=None,interpolation='nearest',keep_aspect_ratio=False)

    img_list.append(image.img_to_array(img))

    
test_image_arr = np.array(img_list)
test_image_arr.shape

test_pred = modelvggAugFT.predict(test_image_arr)    
    
set_style("white")

#predicted_array = modelvggAugFT.predict(X_val.reshape(-1,224,224,3))
def imageArray_predict(model, PATH):

    img = tf.keras.utils.load_img(PATH,grayscale=False,color_mode='rgb',
                    target_size=None,interpolation='nearest',keep_aspect_ratio=False)

    predicted_bird = test_pred[n].argmax()
    actual_bird = test_generator.classes[n]
        
    plt.figure(figsize=(10,10))
    plt.imshow(img)#, cmap='gray')
    plt.text(1,220, 'Predicted: ' + str(labelsDF.loc[labelsDF['label_index'] == predicted_bird, 'labels'].values[0]),
             color='yellow', fontsize=14 )

    plt.text(1,210, 'Actual: ' + str(labelsDF.loc[labelsDF['label_index'] == actual_bird, 'labels'].values[0]),
             color='brown', fontsize=14 )


    plt.show()
    plt.close()
    

def pred_acc(model, PATH):
    img = tf.keras.utils.load_img(PATH,grayscale=False,color_mode='rgb',
                    target_size=None,interpolation='nearest',keep_aspect_ratio=False)
    
    predicted_bird = test_pred[n].argmax()# model.predict(image.img_to_array(img).reshape(-1,224,224,3)).argmax()
    actual_bird = test_generator.classes[n]
    return (predicted_bird == actual_bird)*1

# check for the first n images in the validation set 
# note: zip is a useful function for iterating in parallel 
n1 = 0#np.random.binomial(test_generator.n,0.5,1)[0]
n2= 499 # + n1#n1 + np.random.binomial(10, 0.5, 1)[0]

accuracy=0
for n in (range(n1,n2)):
    #print(n)
    imageArray_predict(modelvggAugFT, test_generator.filepaths[n])  # <<<<<<<<<<<<< show images 
    accuracy += pred_acc(modelvggAugFT, test_generator.filepaths[n])


print("sample accuracy =", accuracy, 'out of', (n2-n1))