In [None]:
import numpy as np
import scipy.io
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import  ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

import os
import re

## Data pipeline

#### Show

In [None]:
BATCH_SIZE = 1

def show_history(history):
	print(history.history.keys())

	# Summarize history for loss
	plt.plot(history.history['loss'])
	plt.plot(history.history['val_loss'])
	plt.title('model loss')
	plt.ylabel('loss')
	plt.xlabel('epoch')
	plt.legend(['train', 'val'], loc = 'upper right')
	plt.show()

#### Create

In [None]:
def get_label(file_path):
	parts = tf.strings.split(file_path, os.path.sep)
	parts = parts[-2]

	return tf.strings.to_number(parts, tf.int64)


def decode_img(img):
	img = tf.image.decode_png(img, channels = 1)
	img = tf.image.convert_image_dtype(img, tf.float32)

	return tf.image.resize(img, [90, 90])


def get_bytes_and_label(file_path):
	label = get_label(file_path)
	img = tf.io.read_file(file_path)
	img = decode_img(img)

	return img, label


def get_bytes(file_path):
	img = tf.io.read_file(file_path)
	img = decode_img(img)
	
	return img

#### Prepare the train datasets

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

def createDataset(path):
    dataset = tf.data.Dataset.list_files(path + '/*/*.png')
    dataset = dataset.map(get_bytes_and_label, num_parallel_calls = AUTOTUNE)

    return dataset 

In [None]:
def prepareDataset(dataset, dataset_length):
    
    # Colocar o dataset na cache
    dataset = dataset.cache()

    # Baralhar as imagens, para previnir treinos de classes individuais
    dataset = dataset.shuffle(buffer_size = dataset_length)

    # Criar as batches
    dataset = dataset.batch(batch_size = BATCH_SIZE)

    # Prefetch dos dados
    dataset = dataset.prefetch(buffer_size = AUTOTUNE)

    # Repetir o dataset, para não acabar
    dataset = dataset.repeat()

    return dataset, dataset_length

In [None]:
def prepare_callbacks(file_path):
    checkpointer = ModelCheckpoint(filepath = file_path, monitor = 'val_loss', verbose = 1, save_weights_only = True, save_best_only = True)

    earlyStopper = EarlyStopping(monitor = 'val_loss', min_delta = 0.0001, patience = 50, verbose = 1)

    reduceLR = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.5, patience = 8, min_lr = 0.000000001, verbose = 1)

    return [checkpointer, earlyStopper, reduceLR]

In [None]:
def createModel(output, input):

    model = Sequential()

    model.add(Flatten(input_shape = (input, input, 1)))
    model.add(Dense(32))
    model.add(LeakyReLU(alpha = 0.01))
    model.add(Dense(32))
    model.add(LeakyReLU(alpha = 0.01))
    model.add(Dense(32))
    model.add(LeakyReLU(alpha = 0.01))
    model.add(Dense(32))
    model.add(LeakyReLU(alpha = 0.01))
    model.add(Dense(32))
    model.add(LeakyReLU(alpha = 0.01))
    model.add(Dense(32))
    model.add(LeakyReLU(alpha = 0.01))
    model.add(Dense(32))
    model.add(Dense(output, activation = 'linear'))
    
    model.compile(optimizer = Adam(learning_rate = 0.01), loss = 'mae')
    
    return model

test = createModel(1, 90)
print(test.summary())

#### Run

In [None]:
NUM_MODELS = 5
TRAIN = False

def runningPipeline(path):
    dataset = createDataset(path)
    dataset_length = tf.data.experimental.cardinality(dataset).numpy()

    valset = createDataset(re.sub(r'train_data', r'val_data', path))
    valset_length = tf.data.experimental.cardinality(valset).numpy()

    dataset, dataset_length = prepareDataset(dataset, dataset_length)
    valset, _ = prepareDataset(valset, valset_length)

    for i in range(NUM_MODELS):
        model = createModel(1, 90)
        
        _ = model.fit(dataset, epochs = 500, steps_per_epoch = dataset_length / BATCH_SIZE,  
                      validation_data = valset, validation_steps = valset_length / BATCH_SIZE,
                      callbacks = prepare_callbacks(re.sub('train_data', 'modelsAug', path) + f'/model{i}/cp.ckpt'))


subfolders = [f.path for f in os.scandir('./train_data/') if f.is_dir()]

if TRAIN:
    for sf in subfolders:
        runningPipeline(sf)

## Evaluate Models

#### Load Models

In [None]:
subfolders = [f.path for f in os.scandir('./modelsAug/') if f.is_dir()]

ALL_MODELS = len(subfolders) * NUM_MODELS


models = []

for _ in range(ALL_MODELS):
    model = createModel(1, 90)
    models.append(model)


counter = 0

def load_weights(path):
    global counter, models

    for i in range(NUM_MODELS):
        models[counter].load_weights(path + f'/model{i}/cp.ckpt')
        counter += 1

for sf in subfolders:
    load_weights(sf)

#### Evaluate Models

In [None]:
valset = createDataset('./val_data/Original')
valset_length = tf.data.experimental.cardinality(valset).numpy()
valset = valset.batch(batch_size = BATCH_SIZE)

EVALUATE = False

def evaluateModels():
    global file

    bestError = 9999
    bestIndex = 0
    counter = 0
    
    for sf in subfolders:
        error_avg = 0

        for _ in range(NUM_MODELS):
            error = models[counter].evaluate(valset, steps = valset_length / BATCH_SIZE, verbose = 0)
            error_avg += error

            if error < bestError:
                bestError = error
                bestIndex = counter

            counter += 1

        file.write(re.sub(r'./modelsAug/', r'', sf) + f' - error: {error_avg / NUM_MODELS}\n\n')

    return bestIndex
    

if EVALUATE:
    file = open('./evaluate.txt', 'w+')
    bestIndex = evaluateModels()
    file.flush()

## Ensemble

In [None]:
PREDS = False

def getLabelsAndPredictions():
    global models

    preds = [[] for _ in range(ALL_MODELS)]
    labels = []

    for images, labs in valset.take(-1):
        labels.extend(labs.numpy())

        for i in range(ALL_MODELS):
            preds[i].extend(np.round(models[i].predict(images)))
    
    return labels, preds

if PREDS:
    labels, preds = getLabelsAndPredictions()
    print(np.array(labels).shape, np.array(preds).shape)

#### Best Models

In [None]:
def error(real, predicted, threshold):
    return abs(real - predicted) < threshold

newModels = []

def filterModels():
    global models, labels, preds, newModels, bestIndex

    for i in range(ALL_MODELS):
        goodModel = False
        c = 0

        if i != bestIndex:
            for k in range(valset_length):
                if error(labels[k], preds[i][k], abs(labels[k] - preds[bestIndex][k])):
                    c += 1
                    
                if c == 8:
                    goodModel = True
                    break
        
        if goodModel:
            newModels.append(models[i])

filterModels()
newModels.append(models[bestIndex])

#### Create Ensemble Model

In [None]:
model_input = tf.keras.Input(shape = (90, 90, 1))

model_outputs = [model(model_input) for model in newModels]
ensemble_output = tf.keras.layers.Average()(model_outputs)

ensemble_model = tf.keras.Model(inputs = model_input, outputs = ensemble_output)
ensemble_model.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'mae')

print(ensemble_model.summary())

Train Ensemble Model

In [None]:
dataset = createDataset('./train_data/Original')
dataset_length = tf.data.experimental.cardinality(dataset).numpy()
dataset, dataset_length = prepareDataset(dataset, dataset_length)

valset = createDataset('./val_data/Original')
valset_length = tf.data.experimental.cardinality(valset).numpy()
valset, _ = prepareDataset(valset, valset_length)

TRAIN_ENSEMBLE = False

if TRAIN_ENSEMBLE:
    history = ensemble_model.fit(dataset, epochs = 500, steps_per_epoch = dataset_length / BATCH_SIZE,  
                                validation_data = valset, validation_steps = valset_length / BATCH_SIZE,
                                callbacks = prepare_callbacks('./ensemble_modelAug/cp.ckpt'))

    show_history(history)

## Predictions

In [None]:
PREDICT = False

def get_data_from_mat(train_file, test_file):
    train_mat = scipy.io.loadmat(train_file) 
    test_mat = scipy.io.loadmat(test_file) 

    train_np = np.array(train_mat['train_data']).transpose(2, 0, 1)
    test_np = np.array(test_mat['test_data']).transpose(2, 0, 1)

    print(train_np.shape)
    print(test_np.shape)

    return train_np, test_np

_, test_data = get_data_from_mat('./bap-imgap-212022/train_data.mat', './bap-imgap-212022/test_data.mat')


if PREDICT:
    predictions = ensemble_model.predict(test_data)
    predictions = np.around(predictions, 0)
    print(predictions)

In [None]:
WRITE = False

if WRITE:
    f = open('submission.csv', 'w+')

    f.write('id,age\n')

    for i in range(len(predictions)):
        f.write(str(i + 1) + ',' + str(int(predictions[i][0])) + '\n')

    f.flush()