# CONSEGNA GRUPPO 19
*   Demetrio Trimarco
*   Emilio Sorrentino
*   Francesco Rosa
*   Francesco Sabbarese

# SETUP

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

## IMPORT


In [None]:
%load_ext tensorboard
from tensorflow import keras
%tensorflow_version 2.x
import tensorflow as tf
from keras.callbacks import TensorBoard
from datetime import datetime
from packaging import version
import os
import io
from PIL import Image
from functools import partial
import matplotlib.pyplot as plt
import numpy as np
import cv2
import json

In [None]:
# import vggface models
!pip install git+https://github.com/rcmalli/keras-vggface.git
!pip install keras_vggface
!pip install keras_applications
from keras_vggface.vggface import VGGFace
import keras_vggface.utils

## VARIABLES

In [None]:
# MODEL_PATH = "/content/gdrive/MyDrive/CONSEGNA/TEST/----------"
MODEL_PATH = "/content/gdrive/MyDrive/CONSEGNA/TEST/model_group_19.h5"

EVALUATION_SIZE = 140173
eval_tfrecord_file_name = "/content/gdrive/MyDrive/CONSEGNA/DATASETS/SUBSET_2_val.tfrecord"

BATCH_SIZE = 64

# RUN EVALUATION

## Label dictionary creation

In [None]:
from csv import reader

def create_example_dictionary():
    train_dictionary = {}
    with open('/content/gdrive/MyDrive/CONSEGNA/DATASETS/train.age_detected.csv', 'r') as read_obj:
        print("Example label opened")
        csv_reader = reader(read_obj)
        for row in csv_reader:
            # print(row[0] + " " + row[-1])
            age = str(row[-1])
            train_dictionary[row[0]] = age

    return train_dictionary

example_dictionary = create_example_dictionary()

## Parse tfrecord function

In [None]:
def get_age(filename):
  filename = str(filename).split("'",2)[1]
  return round(float(example_dictionary[filename]))

def decode_image(image):
  image = tf.image.decode_jpeg(image, channels=3)
  image = tf.image.resize(image, [240, 240])
  image = tf.cast(image, tf.uint8)
  return image

def conv_normalize(image):
  return keras_vggface.utils.preprocess_input(image, 'channels_last', version=2)

def conv_BGR2RGB(image):
  return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

def conv_RGB2BGR(image):
  return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

# parse tfrecord samples
def read_tfrecord(example):
    features = (
        {
          "image": tf.io.FixedLenFeature([], dtype=tf.string),
          "filename": tf.io.FixedLenFeature([], dtype=tf.string)
        }
    )

    example = tf.io.parse_single_example(example, features)

    image = example["image"]
    
    image = decode_image(image) # the decoded image has BGR channels
    
    image = tf.numpy_function(conv_BGR2RGB, [image], tf.uint8)
    image = tf.cast(image, dtype=tf.float32)
    # conv_normalize needs RGB images as input and returns BGR images as output
    image = tf.numpy_function(conv_normalize, [image], tf.float32)

    filename = example["filename"]
    label = tf.numpy_function(get_age, [filename], tf.int64)

    return image, label

## Loading dataset function

In [None]:
# Load a dataset and parse the samples
def load_dataset(tfrecord_filename):
    dataset = tf.data.TFRecordDataset(tfrecord_filename)
    dataset = dataset.map(read_tfrecord)
    return dataset

## Pipeline creation function

In [None]:
def apply_pipeline(path, labeled = True):
    dataset = load_dataset(path)
    dataset = dataset.batch(BATCH_SIZE)
    return dataset

## Load model

In [None]:
# Recreate the exact same model, including its weights and the optimizer
model = tf.keras.models.load_model(MODEL_PATH, compile = False)
model.compile(
          optimizer=tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9),
          loss = tf.keras.losses.MeanAbsoluteError()
)
# Show the model architecture
# model.summary()

## Predict

In [None]:
eval_dataset = apply_pipeline(eval_tfrecord_file_name)

y_pred = model.predict(eval_dataset,
    verbose = 1,
    batch_size = BATCH_SIZE,
    steps = EVALUATION_SIZE/BATCH_SIZE,
    workers = 4,
    use_multiprocessing = True
)

In [None]:
# Get labels from dataset
eval_dataset = load_dataset(eval_tfrecord_file_name)
tf.config.run_functions_eagerly(True)

labels = []
for _, label in eval_dataset:
  labels.append(label.numpy())
print("Labels acquired")

# compute the rounded labels
y_pred_scalar = []
for y_hat in y_pred:
  y_pred_scalar.append(round(y_hat[0]))
y_pred_scalar = np.array(y_pred_scalar).T

In [None]:
# compute MAE
def MAE(y_true, y_pred):
  return (np.sum(abs(y_pred - y_true)))/EVALUATION_SIZE

MAE(np.array(labels).T, y_pred_scalar)

In [None]:
# prediction matrix creation

from sklearn.metrics import confusion_matrix 

matrix = confusion_matrix(np.array(labels).T, y_pred_scalar)

In [None]:
# plot of the confusion matrix

import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

df_cm = pd.DataFrame(matrix, range(matrix.shape[0]), range(matrix.shape[0]))
plt.figure(figsize=(33,33))
sn.set(font_scale=1) # for label size
sn.heatmap(df_cm, annot=True, annot_kws={"size": 7}) # font size

plt.show()