In [38]:
import os
import pickle
import json
from pprint import pprint
from typing import List
import numpy as np
from sklearn.metrics import roc_auc_score

import torch

from framework import MODEL_PATH, RESULTS_PATH
from framework.utils.experiment_utils import get_model, check_network_name, get_datasets, get_data_loader
from framework.config.config_reader import ConfigReader


def load_model(model_path, model_settings):
    model = get_model(model_settings)
    model_path = os.path.join(MODEL_PATH, model_path + '.pt')
    if not os.path.exists(model_path):
        raise RuntimeError('Unkown {} model path'.format(model_path))
    model.load_state_dict(torch.load(model_path))
    return model

def get_scores(model, dataset):
    model.eval()
    cuda = model._is_on_cuda()
    dataloader = get_data_loader(dataset, 1, cuda)

    scores_list = []
    with torch.no_grad():
        for i in range(len(dataset)):
            x = dataset[i][0].to(model._device(), dtype=torch.float).view(1, -1)
            y_hat = model(x)
            scores_list.append(y_hat)
    return torch.stack(scores_list)

def eval(pred_targets, dataset):
    targets = []
    targets_fmd = []
    targets_fms = []
    pred_targets_fmd = []
    pred_targets_fms = []
    for i in range(len(dataset)):
        y = dataset[i][1]
        targets.append(y)
        
        type_relation = dataset.dataset.iloc[i].type
        tripair = dataset._get_tripair(i)
        
        if type_relation == 'fmd':
            targets_fmd.append(y)
            pred_targets_fmd.append(pred_targets[i])
        elif type_relation == 'fms':
            targets_fms.append(y)
            pred_targets_fms.append(pred_targets[i])
        else:
            raise RuntimeError('Unkown relationship type = {}'.format(type_relation))
    
    
    auc = roc_auc_score(np.array(targets), pred_targets)
    fmd_auc = roc_auc_score(targets_fmd, pred_targets_fmd)
    fms_auc = roc_auc_score(targets_fms, pred_targets_fms)
    return {'auc': auc,
            'fmd_auc': fmd_auc,
            'fms_auc': fms_auc}




    

In [39]:
# NOTE This should be a list of profiles you want to use. The models are expected to be saved
model_profiles = ['DROP_TWO_DEC_vgg2', 'DROP_TWO_DEC_arc', 'DROP_TWO_DEC_vgg', 'DROP_TWO_DEC_sphere']
model_profiles = ['DROP_TWO_DEC_arc', 'DROP_TWO_DEC_vgg2', 'DROP_TWO_DEC_sphere']
ensemble_name = 'majority_vote'
if model_profiles == []:
    raise RuntimeError('Please fill in model profiles')

models = []
test_datasets = []
for profile in model_profiles:
    config_data = ConfigReader(profile).config_data

    # Get test datasets for each model
    network_name = config_data['data_settings']['network_name']
    check_network_name(network_name)
    train_dataset, validation_dataset, test_dataset, vec_length = get_datasets(network_name)
#     test_datasets.append(test_dataset)
    test_datasets.append(validation_dataset)

    model_settings = config_data['model_settings']
    model_settings['input_size'] = vec_length
    experiment_name = config_data['experiment_name']
    models.append(load_model(experiment_name, model_settings))

# A list of lists of scores
# NOTE: scores[0] -> scores of model 0 in models list 
scores = torch.stack([get_scores(model, test_dataset) for model, test_dataset in zip(models, test_datasets)])



In [60]:
def high_score(scores):
    # 4, test_size, 1, 2
    result = torch.sum(scores, dim=0)
    # test_size, 1, 2
    result = torch.max(result, dim=2)[1]
    return result.cpu().numpy()

def majority_vote(scores):
    n_models = list(scores.size())[0]
    results = scores.cpu().numpy()
    result = np.argmax(results, axis=3)
    result = np.sum(result)
    result = result/ n_models
    # Note: Not >= because if it is 50/50 more likely to be not related     
    result = (result > 0.5).astype(int)
    return result

def threshold_majority_vote(scores):
    scores = scores.cpu().numpy()
    threshold = 0.85
    pred_targets = []
    n_models = scores.shape[0]
    # Iterate over batch size
    for i in range(scores.shape[1]):
        # NOTE: Assuming order best model to worst model
        # Iterate over models
        votes = []
        for j in range(scores.shape[0]):
            pred_score = scores[j, i]
            if pred_score[0][1] > threshold:
                votes.append(1)
            if pred_score[0][0] > threshold:
                votes.append(0)
        
        if votes == []:
            # TODO: maj vote of scores
            model_scores = np.array([scores[j, i] for j in range(scores.shape[0])])
            result = np.argmax(model_scores, axis = 2)
            result = np.sum(result)
            result = result/n_models
            
        else:
            result = sum(votes)/len(votes)

        if result < 0.5:
            result = 0
        elif result == 0.5:
            result = np.argmax(scores[0, i][0])
        else:
            result = 1
        pred_targets.append(result)
    return pred_targets
                
            
def maj_vote_cascade(scores):
    result = np.argmax(scores, axis=2)
    result = sum(result)/len(result)
    result = int(round(result[0]))
    return result

def cascade_classifier(scores):
    pred_targets = []
    scores = scores.cpu().numpy()
    # Iterate over batch size
    for i in range(scores.shape[1]):


        # NOTE: Assuming order best model to worst model
        # Iterate over models
        pred_target = 0
        for j in range(scores.shape[0]):
            # Get score for specific model
            pred_score = scores[j, i]
            # TODO: Pick a threshold
            threshold = 0.98
            if pred_score[0][1] > threshold:
                pred_target = 1
                break
        
        # If not relation then do majority vote
        if pred_target == 0:
            j_scores = np.array([scores[j, i] for j in range(scores.shape[0])])
            pred_target = maj_vote_cascade(j_scores)
        pred_targets.append(pred_target)

    return np.array(pred_targets)


# TODO: Cascade classifiers
# TODO: Cofidence threshold for the networks if above classify it as it,
# otherwise use all three majority vote
# TODO: weighted classifiers
def get_ensemble(ensemble_name: str):
    # TODO: Add more ensemble methods
    if ensemble_name == "high_score":
        return high_score
    elif ensemble_name == 'majority_vote':
        return majority_vote
    elif ensemble_name == 'cascade':
        return cascade_classifier
    elif ensemble_name == 'thresh_majority_vote':
        return threshold_majority_vote
    else:
        raise RuntimeError('Unkown ensemble name {}'.format(ensemble_name))

In [61]:
# Get the ensemble method function and run it to get predicted targets
ensemble_name = 'thresh_majority_vote'
ensemble_method = get_ensemble(ensemble_name)
pred_targets = ensemble_method(scores)

#  Evaluate model
auc = eval(pred_targets, test_datasets[0])
print('val dataset auc = {}'.format(auc))

val dataset auc = {'auc': 0.7435549525101764, 'fmd_auc': 0.7441860465116279, 'fms_auc': 0.7430025445292621}


In [44]:
vgg_res = os.path.join(RESULTS_PATH, "half_layer_dec_vgg_2020-03-21")
sphere = os.path.join(RESULTS_PATH, "half_layer_dec_sphere_2020-03-21")
arc = os.path.join(RESULTS_PATH, "half_layer_dec_arc_2020-03-21")
vgg2 = os.path.join(RESULTS_PATH, "half_layer_dec_vgg2_2020-03-21")
model_res = [vgg_res, sphere, arc, vgg2]
pickles = []
for result in model_res:
    with open(result, 'rb') as f:
        pickles.append(pickle.load(f))

In [45]:
for i in range(len(pickles)):
    pprint(pickles[i]['test_acc'])

{'auc': 0.704406779661017,
 'fmd_auc': 0.6774425287356322,
 'fms_auc': 0.7284980744544287}
{'auc': 0.6349152542372881,
 'fmd_auc': 0.630028735632184,
 'fms_auc': 0.6392811296534018}
{'auc': 0.7050847457627119,
 'fmd_auc': 0.6673850574712643,
 'fms_auc': 0.7387676508344031}
{'auc': 0.7410169491525423,
 'fmd_auc': 0.7198275862068966,
 'fms_auc': 0.7599486521181001}
