### Connect to drive
The dataset used for tests can be found [here](https://drive.google.com/drive/folders/1SZvBAe8-LApl8PAbEm2lGIhA6CPQX0CV?usp=sharing)

In [None]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [None]:
%cd /gdrive/My Drive/ANNaDL/TTA-tests

/gdrive/My Drive/ANNaDL/TTA-tests


### Import libraries

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

tfk = tf.keras
tfkl = tf.keras.layers

### Load data

In [None]:
filepath='.'
model = tfk.models.load_model(filepath)

In [None]:
def Directory_Iterator_to_Numpy(dir_it):
    X_data = dir_it[0][0]
    y_data = dir_it[0][1]

    for i in range(1, len(dir_it)):
        X_data=np.concatenate((X_data, dir_it[i][0]))
        y_data=np.concatenate((y_data,dir_it[i][1]))

    return X_data, y_data    

In [None]:
labels = ['Apple','Blueberry','Cherry','Corn','Grape','Orange','Peach','Pepper','Potato','Raspberry','Soybean','Squash','Strawberry','Tomato']
datagen = ImageDataGenerator()
dataset_dir='real_set'
test_gen = datagen.flow_from_directory(
        dataset_dir,
        color_mode="rgb",
        shuffle = False)

X_test, y_test = Directory_Iterator_to_Numpy(test_gen)

Found 70 images belonging to 14 classes.


### Evaluate accuracy of the model with TTA
Modified version of functions found [here](https://machinelearningmastery.com/how-to-use-test-time-augmentation-to-improve-model-performance-for-image-classification/)

In [None]:
def tta_prediction(datagen, model, image, n_examples):
	# convert image into dataset
	samples = np.expand_dims(image, 0)
	# prepare iterator
	it = datagen.flow(samples)
	# make predictions for each augmented image
	predictions=[]
	for _ in range(n_examples):
		predictions.append(model.predict(it[0]))	
	predict = np.array(predictions)
	summed = np.sum(predict, axis=0)
	out = tf.argmax(summed, axis=-1)
	return out

In [None]:
from sklearn.metrics import accuracy_score
def tta_evaluate_model(model, testX, testY, datagen):
	# define the number of augmented images to generate per test set image
	n_examples_per_image = 5
	yhats = list()
	for i in range(len(testX)):
		# make augmented prediction
		yhat = tta_prediction(datagen, model, testX[i], n_examples_per_image)
		# store for evaluation
		yhats.append(yhat)
	# calculate accuracy
	testY_labels = np.argmax(testY, axis=1)
	acc = accuracy_score(testY_labels, yhats)
	return acc

### Prediction accuracy without TTA

In [None]:
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
accuracy_without_tta = tta_evaluate_model(model, X_test, y_test, datagen)
print(accuracy_without_tta)

0.8571428571428571


### Average predictions
The final accuracy depends on the type of augmentations that were used. Therefore, the random seed was not set for transformations to make it possible the average the results.

In [None]:
datagen = ImageDataGenerator(shear_range=0.1,
       zoom_range=0.1,
       horizontal_flip=True,
       vertical_flip=True,
       rotation_range=10.,
       width_shift_range = 0.1,
       height_shift_range = 0.1,
       fill_mode='nearest',
       preprocessing_function=preprocess_input)

results = []
num_of_accuracies = 10
for _ in range(num_of_accuracies):
    results.append(tta_evaluate_model(model, X_test, y_test, datagen))
sum(results)/num_of_accuracies

0.9342857142857144