In [1]:

import cv2
import keras_tuner as kt
import numpy as np
import os
import pandas as pd
import tensorflow as tf

from collections import defaultdict
from glob import glob
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace
from random import choice, sample
from tensorflow import keras
from tensorflow.keras.applications import ResNet50V2, ResNet101V2, Xception, InceptionV3, InceptionResNetV2, MobileNetV2, DenseNet121, DenseNet169, DenseNet201, NASNetMobile, EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import Input, Flatten, Dense, GlobalMaxPool2D, GlobalAvgPool2D, Conv1D, MaxPool1D, Concatenate, Add, Lambda, Multiply, Dropout, Subtract
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.callbacks import TensorBoard

from tensorflow.keras.constraints import max_norm

from itertools import product

In [2]:
def test_models(test_cases):
    
    generator = gen(train, train_person_to_images_map, batch_size=16)
    for test_case in test_cases:
        print(f'testing {test_case}')
        images, labels = next(generator)
        image_1 = images['input_1']
        test_model = get_model_by_name(test_case)
        result = test_model(image_1)
        print(result)
        
def get_model_by_name(model_type):
    kwargs = {'include_top': False}
    if model_type == "vgg16":
        return VGGFace(model='vgg16', **kwargs)
    elif model_type == "resnet50":
        return VGGFace(model='resnet50', **kwargs)
    elif model_type == "senet50":
        return VGGFace(model='senet50', **kwargs)
    elif model_type == "Xception":
        return Xception(**kwargs)
    elif model_type == "InceptionV3":
        return InceptionV3(**kwargs)
    elif model_type == "InceptionResNetV2":
        return InceptionResNetV2(**kwargs)
    elif model_type == "MobileNetV2":
        return MobileNetV2(**kwargs)
    elif model_type == "DenseNet121":
        return DenseNet121(**kwargs)
    elif model_type == "DenseNet169":
        return DenseNet169(**kwargs)
    elif model_type == "DenseNet201":
        return DenseNet201(**kwargs)
    elif model_type == "NASNetMobile":
        return NASNetMobile(**kwargs)
    elif model_type == "EfficientNetB0":
        return EfficientNetB0(**kwargs)
    elif model_type == "EfficientNetB1":
        return EfficientNetB1(**kwargs)
    elif model_type == "EfficientNetB2":
        return EfficientNetB2(**kwargs)
    elif model_type == "EfficientNetB3":
        return EfficientNetB3(**kwargs)
    elif model_type == "EfficientNetB4":
        return EfficientNetB4(**kwargs)
    elif model_type == "EfficientNetB5":
        return EfficientNetB5(**kwargs)

In [3]:
list_of_transfer_learning_models = [
    "vgg16", 
    "resnet50", 
]

In [4]:
existing_train_data = pd.read_csv('/home/roman/Work/kinship/train_ds.csv')
new_train_data = pd.read_csv('/home/roman/Work/kinship/train2.csv')
new_new_train_data = pd.read_csv('/home/roman/Work/kinship/train3.csv')
# new_new_new_train_data = pd.read_csv('/home/roman/Work/kinship/train4.csv')

In [5]:
def read_img(path1, path2, perturb=False, color_transfer_bool=False):
    if perturb and color_transfer_bool:
#         image1 = cv2.imread(path1)
#         image2 = cv2.imread(path2)
        
        image1 = image_p.load_img(path1, target_size=(224, 224))
        image1 = np.array(image1).astype(np.float)
        image2 = image_p.load_img(path2, target_size=(224, 224))
        image2 = np.array(image2).astype(np.float)
        
        colored_image2 = color_transfer(image1, image2)
        
        generated_perturbation_parameters = generate_random_perturbation_parameters()
        perturbed_image1 = rotate_resize_translate_image(image1, *generated_perturbation_parameters).astype(np.float)
        
        generated_perturbation_parameters = generate_random_perturbation_parameters()
        perturbed_image2 = rotate_resize_translate_image(colored_image2, *generated_perturbation_parameters).astype(np.float)
        
#         img1 = image_p.load_img(path1, target_size=(224, 224))
#         img1 = np.array(img1).astype(np.float)
#         img2 = image_p.load_img(path2, target_size=(224, 224))
#         img2 = np.array(img2).astype(np.float)
        
        # print(preprocess_input(img1, version=2))
        # print(preprocess_input(img2, version=2))
        
        # print(perturbed_image1)
        # print(perturbed_image2)
        
        return preprocess_input(perturbed_image1, version=2), preprocess_input(perturbed_image2, version=2)
    elif perturb and not color_transfer_bool:
        # image = cv2.imread(path1)
        image = image_p.load_img(path1, target_size=(224, 224))
        image = np.array(image).astype(np.float)
        
        generated_perturbation_parameters = generate_random_perturbation_parameters()
        perturbed_image1 = rotate_resize_translate_image(image, *generated_perturbation_parameters).astype(np.float)
        perturbed_image1 = np.matrix.round(perturbed_image1)
        # image = cv2.imread(path2)
        image = image_p.load_img(path1, target_size=(224, 224))
        image = np.array(image).astype(np.float)
        generated_perturbation_parameters = generate_random_perturbation_parameters()
        perturbed_image2 = rotate_resize_translate_image(image, *generated_perturbation_parameters).astype(np.float)
        perturbed_image2 = np.matrix.round(perturbed_image2)
        return preprocess_input(perturbed_image1, version=2), preprocess_input(perturbed_image2, version=2)
    elif not perturb and color_transfer_bool:
#         image1 = cv2.imread(path1).astype(np.float)
#         image2 = cv2.imread(path2).astype(np.float)
        
        image1 = image_p.load_img(path1, target_size=(224, 224))
        image1 = np.array(image1).astype(np.float)
        image2 = image_p.load_img(path2, target_size=(224, 224))
        image2 = np.array(image2).astype(np.float)
        
        colored_image2 = color_transfer(image1, image2).astype(np.float)
        return preprocess_input(image1, version=2).astype(np.float), preprocess_input(colored_image2, version=2).astype(np.float)
    else:
        img1 = image_p.load_img(path1, target_size=(224, 224))
        img1 = np.array(img1).astype(np.float)
        img2 = image_p.load_img(path2, target_size=(224, 224))
        img2 = np.array(img2).astype(np.float)
        return preprocess_input(img1, version=2), preprocess_input(img2, version=2)
    
def gen(relationship_tuples, non_relationship_tuples, person_to_images_map, batch_size=16):
    while True:
        # no color transfer
#         relationship_batch_tuples = sample(relationship_tuples, batch_size//2)
#         non_relationship_batch_tuples = sample(non_relationship_tuples, batch_size//2)
        
#         incomplete_path_batch_tuples = relationship_batch_tuples + non_relationship_batch_tuples
        
#         batch_tuples = [(sample(person_to_images_map[p1], 1)[0], sample(person_to_images_map[p2], 1)[0]) for p1, p2 in incomplete_path_batch_tuples]
            
#         labels = batch_size//2*[1] + batch_size//2*[0]
        
#         # print(batch_tuples)
#         # print(labels)
        
#         processed_batch_image_tuples = [read_img(img1_path, img2_path) for img1_path, img2_path in batch_tuples]
#         unzipped_processed_batch_images = list(zip(*processed_batch_image_tuples))
        
#         X1 = np.array(list(unzipped_processed_batch_images[0]))

#         X2 = np.array(list(unzipped_processed_batch_images[1]))

#         yield [X1, X2], np.array(labels)
        
        # color transfer and perturb
        relationship_batch_tuples = sample(relationship_tuples, batch_size//2)
        non_relationship_batch_tuples = sample(non_relationship_tuples, batch_size//2)
        
        incomplete_path_batch_tuples = relationship_batch_tuples + non_relationship_batch_tuples
        
        batch_tuples = [(sample(person_to_images_map[p1], 1)[0], sample(person_to_images_map[p2], 1)[0]) for p1, p2 in incomplete_path_batch_tuples]
            
        labels = batch_size//2*[1] + batch_size//2*[0]
        
        # print(batch_tuples)
        # print(labels)
        
        processed_batch_image_tuples = [read_img(img1_path, img2_path, perturb=False, color_transfer_bool=False) for img1_path, img2_path in batch_tuples]
        unzipped_processed_batch_images = list(zip(*processed_batch_image_tuples))
        
        X1 = np.array(list(unzipped_processed_batch_images[0]))

        X2 = np.array(list(unzipped_processed_batch_images[1]))

        yield [X1, X2], np.array(labels)
        
        

In [6]:
relationship_set = set()
non_relationship_set = set()
relationship_set_2 = set()

folder_to_images_map = {}
all_people_folders = set()
base_path = '/home/roman/Work/kinship/train/train-faces/'
for index, row in existing_train_data.iterrows():
    if row['relationship'] == 1.0:
        p1_path = row['p1'].split('/')
        p2_path = row['p2'].split('/')
        p1_rel_path = p1_path[0] + '/' + p1_path[1]
        p2_rel_path = p2_path[0] + '/' + p2_path[1]
        
        complete_p1_path = os.path.join(base_path, p1_rel_path)
        complete_p2_path = os.path.join(base_path, p2_rel_path)
        
        # Maintain sets for people-folders and known positive relationships
        
        relationship_set.add((complete_p1_path, complete_p2_path))
        relationship_set.add((complete_p2_path, complete_p1_path))
        relationship_set_2.add((p1_path[0], p2_path[0]))
        relationship_set_2.add((p2_path[0], p1_path[0]))
        all_people_folders.add(complete_p1_path)
        all_people_folders.add(complete_p2_path)
        
        # Create mapping between folder names and their image files that ar econtained
        folder_to_images_map[complete_p1_path] = set()
        folder_to_images_map[complete_p2_path] = set()
        
        for root, dirs, files in os.walk(complete_p1_path, topdown=False):
            for name in files:
                folder_to_images_map[complete_p1_path].add(os.path.join(complete_p1_path, name))
        
        for root, dirs, files in os.walk(complete_p2_path, topdown=False):
            for name in files:
                folder_to_images_map[complete_p2_path].add(os.path.join(complete_p2_path, name))
        
for index, row in new_train_data.iterrows():
    if row['relationship'] == 1:
        p1_path = row['p1'].split('/')
        p2_path = row['p2'].split('/')
        p1_rel_path = p1_path[0] + '/' + p1_path[1]
        p2_rel_path = p2_path[0] + '/' + p2_path[1]
        
        complete_p1_path = os.path.join(base_path, p1_rel_path)
        complete_p2_path = os.path.join(base_path, p2_rel_path)
        
        relationship_set.add((complete_p1_path, complete_p2_path))
        relationship_set.add((complete_p2_path, complete_p1_path))
        relationship_set_2.add((p1_path[0], p2_path[0]))
        relationship_set_2.add((p2_path[0], p1_path[0]))
        all_people_folders.add(complete_p1_path)
        all_people_folders.add(complete_p2_path)
        
        folder_to_images_map[complete_p1_path] = set()
        folder_to_images_map[complete_p2_path] = set()
        
        for root, dirs, files in os.walk(complete_p1_path, topdown=False):
            for name in files:
                folder_to_images_map[complete_p1_path].add(os.path.join(complete_p1_path, name))
        
        for root, dirs, files in os.walk(complete_p2_path, topdown=False):
            for name in files:
                folder_to_images_map[complete_p2_path].add(os.path.join(complete_p2_path, name))

for index, row in new_new_train_data.iterrows():
    # print(new_new_train_data)
    if row['relationship'] == 1:
        p1_path = row['p1'].split('/')
        p2_path = row['p2'].split('/')
        p1_rel_path = p1_path[0] + '/' + p1_path[1]
        p2_rel_path = p2_path[0] + '/' + p2_path[1]
        
        complete_p1_path = os.path.join(base_path, p1_rel_path)
        complete_p2_path = os.path.join(base_path, p2_rel_path)
        
        relationship_set.add((complete_p1_path, complete_p2_path))
        relationship_set.add((complete_p2_path, complete_p1_path))
        relationship_set_2.add((p1_path[0], p2_path[0]))
        relationship_set_2.add((p2_path[0], p1_path[0]))
        all_people_folders.add(complete_p1_path)
        all_people_folders.add(complete_p2_path)
        
        folder_to_images_map[complete_p1_path] = set()
        folder_to_images_map[complete_p2_path] = set()
        
        for root, dirs, files in os.walk(complete_p1_path, topdown=False):
            for name in files:
                folder_to_images_map[complete_p1_path].add(os.path.join(complete_p1_path, name))
        
        for root, dirs, files in os.walk(complete_p2_path, topdown=False):
            for name in files:
                folder_to_images_map[complete_p2_path].add(os.path.join(complete_p2_path, name))
                
# create all possible combos of pairings
for a, b in product(list(all_people_folders), list(all_people_folders)):
    candidate_pair = (a,b)
    a_person_id = a.split('/')[7]
    b_person_id = b.split('/')[7]
    if candidate_pair not in relationship_set and a_person_id!=b_person_id:
        non_relationship_set.add(candidate_pair)
        
for a, b in product(list(all_people_folders), list(all_people_folders)):
    candidate_pair = (a,b)
    a_person_id = a.split('/')[7]
    b_person_id = b.split('/')[7]
    person_candidate_pair = (a_person_id, b_person_id)
    if candidate_pair not in non_relationship_set and person_candidate_pair in relationship_set_2:
        relationship_set.add(candidate_pair)
        
# for index, row in new_new_new_train_data.iterrows():
#     p1_rel_path = row['p1']
#     p2_rel_path = row['p2']

#     complete_p1_path = os.path.join(base_path, p1_rel_path)
#     complete_p2_path = os.path.join(base_path, p2_rel_path)

#     relationship_set.add((complete_p1_path, complete_p2_path))
#     relationship_set.add((complete_p2_path, complete_p1_path))

#     all_people_folders.add(complete_p1_path)
#     all_people_folders.add(complete_p2_path)

#     folder_to_images_map[complete_p1_path] = set()
#     folder_to_images_map[complete_p2_path] = set()


#     folder_to_images_map[complete_p1_path].add(complete_p2_path)
#     folder_to_images_map[complete_p2_path].add(complete_p2_path)
                

In [7]:

train_file_path = "/home/roman/Work/kinship/train_ds.csv"
train_folders_path = "/home/roman/Work/kinship/train/train-faces/"

def split(a, n):
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

def get_random_validation_sets(all_relationships, all_non_relationships, n):

    relationship_set =  [] 
    for validation_set in split(list(all_relationships), n):
        relationship_set.append(set(validation_set))
    
    non_relationship_set =  [] 
    for validation_set in split(list(all_non_relationships), n):
        non_relationship_set.append(set(validation_set))
    
    return relationship_set, non_relationship_set

def get_random_train_val_sets(all_relationships, all_non_relationships, n, person_to_images_map):
    generators = []
    
    relationship_val_sets, non_relationship_val_sets = get_random_validation_sets(all_relationships, all_non_relationships, n)
    
    for val_relationship_set, val_non_relationship_set in zip(relationship_val_sets, non_relationship_val_sets):
        train_relationship_set = all_relationships.difference(val_relationship_set)
        train_non_relationship_set = all_non_relationships.difference(val_non_relationship_set)
        yield gen(train_relationship_set, train_non_relationship_set, person_to_images_map), gen(val_relationship_set, val_non_relationship_set, person_to_images_map)
 

In [8]:
k_folds = 5
train_val_gen = get_random_train_val_sets(relationship_set, non_relationship_set, k_folds, folder_to_images_map)
relationship_val_sets, non_relationship_val_sets = get_random_validation_sets(relationship_set, non_relationship_set, k_folds)

In [9]:
# import datetime
# import pickle
# # with open(f"validation_sets_kt_{datetime.date.today().strftime('%Y_%m_%d')}.pickle", 'wb') as f:
# #     pickle.dump(validation_sets, f)
# with open(f"validation_sets_kt_{datetime.date.today().strftime('%Y_%m_%d')}.pickle", 'rb') as f:
#      validation_sets = pickle.load(f)

## Callbacks

In [10]:
class CustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs.get('val_accuracy') >= .8:
            self.model.stop_training = True

In [11]:
# file_path = "/home/roman/Work/kinship/vgg_face_ak.h5"

# checkpoint = ModelCheckpoint(file_path, monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=True, mode='max')

reduce_on_plateau = ReduceLROnPlateau(monitor="val_accuracy", mode="max", factor=0.1, patience=1, verbose=1)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3)

log_dir = '/home/roman/Work/kinship/logs'

max_val_acc = CustomCallback()


tensorboard_callback = TensorBoard(log_dir=log_dir)

# callbacks_list = [checkpoint, reduce_on_plateau]
callbacks_list = [tensorboard_callback, max_val_acc, reduce_on_plateau, stop_early]

## Model definition

In [12]:
def build_model(hp):
    input1 = tf.keras.Input(shape=(224, 224, 3))
    input2 = tf.keras.Input(shape=(224, 224, 3))
    
    base_model_string = hp.Choice('transfer_learning_model', list_of_transfer_learning_models, default='resnet50')
    base_model = get_model_by_name(base_model_string)

    for x in base_model.layers[:-3]:
        x.trainable = True
    x1 = base_model(input1)
    x2 = base_model(input2)

    x1_avg = GlobalAvgPool2D()(x1)
    x2_avg = GlobalAvgPool2D()(x2)

    x1_max = GlobalMaxPool2D()(x1)
    x2_max = GlobalMaxPool2D()(x2)
    
    lambda_outputs = []
    
    for i in range(hp.Int("number_of_lambda_layers", 1, 4, default=2)):
        
        image_1_pooling_style = hp.Choice(f"image_1_pooling_style_{i}", ['max', 'average'], default='max')
        image_2_pooling_style = hp.Choice(f"image_2_pooling_style_{i}", ['max', 'average'], default='max')
        
        a = hp.Float(f"x_coefficient_{i}", -4, 4)
        b = hp.Float(f"y_coefficient_{i}", -4, 4)
        c = hp.Float(f"xy_coefficient_{i}", -16, 16)
        
        if image_1_pooling_style == "max" and image_2_pooling_style == "max":
            lambda_outputs.append(Lambda(lambda x : (a*x[0] + b*x[1])**2 + c*x[0]*x[1])([x1_max, x2_max]))
        elif image_1_pooling_style == "average" and image_2_pooling_style == "max":
            lambda_outputs.append(Lambda(lambda x : (a*x[0] + b*x[1])**2 + c*x[0]*x[1])([x1_avg, x2_max]))
        elif image_1_pooling_style == "average" and image_2_pooling_style == "average":
            lambda_outputs.append(Lambda(lambda x : (a*x[0] + b*x[1])**2 + c*x[0]*x[1])([x1_avg, x2_avg]))
        elif image_1_pooling_style == "max" and image_2_pooling_style == "average":
            lambda_outputs.append(Lambda(lambda x : (a*x[0] + b*x[1])**2 + c*x[0]*x[1])([x1_max, x2_avg]))
        else:
            raise ValueError(f'illegal pooling style {image_1_pooling_style} {image_2_pooling_style}')
    
    if len(lambda_outputs) > 1:
        x = Concatenate(axis=-1)(lambda_outputs)
    else:
        x = lambda_outputs[0]
        
    activation_function = hp.Choice(f"activation", ['relu', 'softplus', 'softsign', 'elu', 'selu'], default='relu')
    
    for j in range(hp.Int("number_of_dense_final_layers", 1, 2, default=1)):
        x = Dense(units=hp.Int(f"dense_final_unit_{j}", 8, 256, default=128), kernel_constraint=max_norm(1.5), bias_constraint=max_norm(1.5), activation=activation_function)(x)
        x = Dropout(hp.Float(f"dropout_dense_final_unit_{j}", 0, 0.36))(x)
                                                                                             
    out = Dense(1, activation="sigmoid")(x)
    
    model = tf.keras.Model([input1, input2], out)
    
    model.compile(
        optimizer=Adam(hp.Float("lr", 0.00000001, 0.001, sampling="log")),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    
    return model

In [13]:
def baseline_model():
    input_1 = Input(shape=(224, 224, 3))
    input_2 = Input(shape=(224, 224, 3))

    base_model = VGGFace(model='resnet50', include_top=False)

    for x in base_model.layers[:-3]:
        x.trainable = True

    x1 = base_model(input_1)
    x2 = base_model(input_2)


    x1=GlobalMaxPool2D()(x1)
    x2=GlobalAvgPool2D()(x2)
    
    x3 = Subtract()([x1, x2])
    x3 = Multiply()([x3, x3])

    x1_ = Multiply()([x1, x1])
    x2_ = Multiply()([x2, x2])
    x4 = Subtract()([x1_, x2_])
    
    x5 = Multiply()([x1, x2])
    
    x = Concatenate(axis=-1)([x3, x4, x5])
#     x = Dense(512, activation="relu")(x)
#     x = Dropout(0.03)(x)
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.2)(x)
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.2)(x)
    out = Dense(1, activation="sigmoid")(x)


    model = Model([input_1, input_2], out)

    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(0.00001))
#     model.compile(loss=[focal_loss(alpha=.25, gamma=2)], metrics=['acc'], optimizer=Adam(0.00003))
#     model.compile(loss=[focal_loss(alpha=.25, gamma=2)], metrics=['acc'], optimizer=Adam(0.00001))
    model.summary()

    return model

In [14]:
model = baseline_model()

InternalError: CUDA runtime implicit initialization on GPU:0 failed. Status: out of memory

In [None]:
history = model.fit(
            train_gen, 
            use_multiprocessing=False,
            validation_data=val_gen, 
            epochs=16, 
            steps_per_epoch=32, validation_steps=128
        )


# Model Search

In [None]:
from tensorflow.keras.preprocessing import image as image_p

In [None]:
train_val_gen = get_random_train_val_sets(relationship_set, non_relationship_set, k_folds, folder_to_images_map)
for i, (train_gen, val_gen) in enumerate(train_val_gen):

    tuner = kt.Hyperband(
        build_model, 
        max_model_size=1000000000,
        project_name=f'/media/roman/9C4210C14210A256/kinship_models_random_validation_new_{index}',
        objective="val_accuracy", 
        max_epochs=16, 
        hyperband_iterations=1,
        overwrite=True,
        executions_per_trial=3,
    )

    tuner.search(
        train_gen, 
        validation_data=val_gen, 
        steps_per_epoch=128,
        validation_steps=128,
        callbacks=callbacks_list
    )

In [None]:
prediction_list = []

for j, _ in enumerate(train_val_generator):
    tuner = kt.Hyperband(
        build_model, 
        max_model_size=1000000000,
        project_name=f'/media/roman/9C4210C14210A256/kinship_models_random_validation_{index}',
        objective="val_accuracy", 
        max_epochs=36, 
        hyperband_iterations=1,
        overwrite=False,
        executions_per_trial=4,
    )
    best_hps = tuner.get_best_hyperparameters(num_trials=8)
    
    predictions = []
    
    for i in range(0, len(submission.p1.values), 32):

        X1 = submission.p1.values[i:i+32]
        X1 = np.array([read_img(test_path + x) for x in X1])

        X2 = submission.p2.values[i:i+32]
        X2 = np.array([read_img(test_path + x) for x in X2])

        pred = model.predict([X1, X2]).ravel().tolist()
        predictions += pred
        
    np.array(predictions)
        
    prediction_list.append(predictions)
    
    d = {'index': np.arange(0, 3000, 1), 'label': predictions}
    submissionfile = pd.DataFrame(data=d)
    submissionfile = submissionfile.round()
    submissionfile['label'] = submissionfile['label'].astype(int)
    submissionfile.to_csv(f"/home/roman/Work/kinship/jba2131_rzw2002_pwc_random_validation_ensemble_v2_{datetime.date.today().strftime('%Y_%m_%d')}_{j}.csv", index=False)

    del model

In [None]:
submission = pd.read_csv('/home/roman/Work/kinship/test_ds.csv')
test_path = "./test/"
prediction_list = []

train_val_generator = get_train_validation_sets_from_split(validation_sets)

for index, (train, val, train_map, val_map) in enumerate(train_val_generator):
    tuner = kt.Hyperband(
        build_model, 
        max_model_size=1000000000,
        project_name=f'/media/roman/9C4210C14210A256/kinship_models_random_validation_{index}',
        objective="val_accuracy", 
        max_epochs=36, 
        hyperband_iterations=1,
        overwrite=False,
        executions_per_trial=4,
    )
    best_hps = tuner.get_best_hyperparameters(num_trials=8)
    for hps in best_hps:
        model = tuner.hypermodel.build(hps)
        submission = pd.read_csv('/home/roman/Work/kinship/test_ds.csv')
        predictions = []
        for i in range(0, len(submission.p1.values), 32):

            X1 = submission.p1.values[i:i+32]
            X1 = np.array([read_img(test_path + x) for x in X1])

            X2 = submission.p2.values[i:i+32]
            X2 = np.array([read_img(test_path + x) for x in X2])

            pred = model.predict([X1, X2]).ravel().tolist()
            predictions += pred
#         d = {'index': np.arange(0, 3000, 1), 'label': predictions}
#         submissionfile = pd.DataFrame(data=d)
#         submissionfile = submissionfile.round()
#         submissionfile['label'] = submissionfile['label'].astype(int)
#         submissionfile.to_csv(f"/home/roman/Work/kinship/jba2131_rzw2002_pwc_random_validation_ensemble_v2_{datetime.date.today().strftime('%Y_%m_%d')
        prediction_list.append(predictions)
    

In [None]:
import pickle
with open('giant_ensemble_set.pickle', 'wb') as f:
    pickle.dump(prediction_list, f)

In [None]:
len(prediction_list)

In [None]:
from scipy import stats

func = lambda x: int(round(x,0))
final_predictions = [list(map(func, i)) for i in prediction_list]
final_predictions = stats.mode(final_predictions[:39])[0][0]
d = {'index': np.arange(0, 3000, 1), 'label': final_predictions}
submissionfile = pd.DataFrame(data=d)
submissionfile['label'] = submissionfile['label'].astype(int)
submissionfile.to_csv(f"/home/roman/Work/kinship/jba2131_rzw2002_automl_mega_ensemble_v1_{datetime.date.today().strftime('%Y_%m_%d')}.csv", index=False)

In [None]:
kaggle competitions submit -c coms4995-kinship-recognition -f /home/roman/Work/kinship/jba2131_rzw2002_automl_mega_ensemble_v1_2021_08_11.csv -m "automl mega ensemble v1 first 39"