In [1]:
import sys
sys.path.append('..')
%env CUDA_VISIBLE_DEVICES=0

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

import torch
import torch.nn as nn
import os
from collections import defaultdict

from torch.distributions import MultivariateNormal

from src.mrartemev_nflib.flows import NormalizingFlowModel, InvertiblePermutation, Invertible1x1Conv, ActNorm, NSF_AR
from src.mrartemev_nflib.flows import MAF, AffineHalfFlow
from src.mrartemev_nflib.nn import ARMLP, MLP

from torch.utils.data import Dataset, DataLoader, TensorDataset
from itertools import repeat

from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score

from src.nf import CalibratedModel, neg_log_likelihood
from src.nf.classifiers import train_catboost_clf
from scipy.special import logsumexp, expit


os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

env: CUDA_VISIBLE_DEVICES=0


device(type='cuda')

In [2]:
! ls dumps_20200602/GAS/SPLINE-AR_2_ind1

checkpoints  final_model.checkpoint


In [3]:
def fix_act_norm(layer):
    if isinstance(layer, ActNorm):
        layer.data_dep_init_done = True

In [4]:
def get_model(model_type, data, num_layers, dump_path):
    flows = []
    for _ in range(num_layers):
        if model_type == 'MAF':
            flows.append(MAF(dim=data.n_dims, base_network=ARMLP))
            flows.append(InvertiblePermutation(dim=data.n_dims))
        if model_type == 'SPLINE-AR':
            flows.append(ActNorm(dim=data.n_dims))
            flows.append(Invertible1x1Conv(dim=data.n_dims))
            flows.append(NSF_AR(dim=data.n_dims, K=8, B=3, hidden_features=32, depth=1, base_network=MLP))
        if model_type == 'GLOW':
            flows.append(ActNorm(dim=data.n_dims))
            flows.append(Invertible1x1Conv(dim=data.n_dims))
            flows.append(AffineHalfFlow(dim=data.n_dims, hidden_features=32, base_network=MLP))
            flows.append(InvertiblePermutation(dim=data.n_dims))
        if model_type == 'RealNVP':
            flows.append(AffineHalfFlow(dim=data.n_dims, base_network=MLP))
            flows.append(InvertiblePermutation(dim=data.n_dims))

    lr = 0.0005

    prior = MultivariateNormal(torch.zeros(data.n_dims).to(device), torch.eye(data.n_dims).to(device))
    model = NormalizingFlowModel(prior, flows).to(device)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    checkpoint = torch.load(dump_path)
    model.load_state_dict(checkpoint['model.state_dict()'])
    model.apply(fix_act_norm)
    
    return model

In [5]:
def to_device(model, device):
    model.to(device)
    model.prior = MultivariateNormal(torch.zeros(data.n_dims).to(device),
                                     torch.eye(data.n_dims).to(device))

In [6]:
def repeater(data_loader):
    for loader in repeat(data_loader):
        for data in loader:
            yield data

In [7]:
def batched_sample(model, n, batch_size=14000):
    generated = []
    for _ in range(n // batch_size):
        generated_batch = model.sample(batch_size)
        generated.append(generated_batch.cpu().detach())
    if n % batch_size != 0:
        generated_batch = model.sample(n % batch_size)
        generated.append(generated_batch.cpu().detach())
    generated = torch.cat(generated, dim=0)
    assert n == len(generated)
    return generated

In [8]:
def logloss_with_logits(y_pred_logits, y_true):
    return -np.mean(
        y_true * np.logaddexp(0, -y_pred_logits) + \
        (1 - y_true) * np.logaddexp(0, y_pred_logits)
    )

In [9]:
def train_cb(model, X_train_tensor, X_test_tensor, clips):
    clf_ds_train = np.row_stack([
        np.column_stack([X_train_tensor.cpu().detach().numpy(), np.ones(len(X_train_tensor)).reshape(-1, 1)]),
        np.column_stack([model.sample_n(len(X_train_tensor)).cpu().detach().numpy(), np.zeros(len(X_train_tensor)).reshape(-1, 1)])
    ]).astype(np.float32)

    clf_ds_test = np.row_stack([
        np.column_stack([X_test_tensor.cpu().detach().numpy(), np.ones(len(X_test_tensor)).reshape(-1, 1)]),
        np.column_stack([model.sample_n(len(X_test_tensor)).cpu().detach().numpy(), np.zeros(len(X_test_tensor)).reshape(-1, 1)])
    ]).astype(np.float32)

    samples = model.sample_n(n).cpu().detach().cpu().numpy()
    clf = CatBoostClassifier(100, verbose=0).fit(
        clf_ds_train[:, :-1], clf_ds_train[:, -1],
    )

    metrics = []
    for clip in clips:
        calibrated_model = CalibratedModel(
            lambda x: np.clip(clf.predict(x, prediction_type='RawFormulaVal'), -100, clip),
            model,
            logit=True
        )
        clf_preds = np.clip(clf.predict(samples, prediction_type='RawFormulaVal'), -100, np.log(clip))
        calibration_constant = logsumexp(clf_preds) - np.log(len(clf_preds))
        logits = clf_preds - calibration_constant
        ll = -neg_log_likelihood(calibrated_model, X_test_tensor.cpu().detach()) - calibration_constant
        auc_roc = roc_auc_score(
            clf_ds_test[:, -1],
            np.clip(clf.predict(clf_ds_test[:, :-1], prediction_type='RawFormulaVal'), -100, np.log(clip)),
        )
        log_loss = logloss_with_logits(
            np.clip(clf.predict(clf_ds_test[:, :-1], prediction_type='RawFormulaVal'), -100, np.log(clip)),
            clf_ds_test[:, -1]
        )
        metrics.append({
            'clip': clip,
            'll': ll,
            'auc_roc': auc_roc,
            'logloss': log_loss,
            'overhead': np.max(logits),
            'calibration_constant': calibration_constant
        })
    
    return clf_ds_train, clf_ds_test, metrics

In [10]:
from utils import data_utils

data_mapping = {'BSDS300': data_utils.BSDS300,
                'GAS': data_utils.GAS,
                'MINIBOONE': data_utils.MINIBOONE,
                'POWER': data_utils.POWER,
                'HEPMASS': data_utils.HEPMASS}

In [22]:
def get_best_model(model_type, data, num_layers, dumps_path):
    best_ll, best_model, best_dump = -10000000, None, None
    X_test_tensor = torch.from_numpy(data.tst.x[:100000]).to(device)
    
    for dump_path in [dumps_path + '/final_model.checkpoint'] + [
        os.path.join(dumps_path, 'checkpoints', path) for path in os.listdir(dumps_path + '/checkpoints')
    ]:
        try:
            model = get_model(model_type, data, num_layers, dump_path)
        except FileNotFoundError:
            print(f'Not found {dump_path}')
            continue
        ll = -neg_log_likelihood(model, X_test_tensor)
        if ll > best_ll:
            best_ll = ll
            best_model = model
            best_dump = dump_path
    return best_model, best_ll, best_dump

In [23]:
arr = []
for data_name in ('MINIBOONE', 'BSDS300', 'GAS', 'HEPMASS', 'POWER'):
    data = data_mapping[data_name]()
    dim = data.n_dims
    n = min(100000, data.trn.x.shape[0])
    X_train_tensor = torch.from_numpy(data.trn.x[:n]).to(device)
    X_test_tensor = torch.from_numpy(data.tst.x[:n]).to(device)

    for model_type in ('GLOW', 'MAF', 'RealNVP', 'SPLINE-AR'):
        num_layers = 2 if model_type == 'SPLINE-AR' else 5
        model_name = f"{model_type}_{num_layers}"

        dumps_path = f'dumps_20200602/{data_name}/{model_type}_{num_layers}_ind1'
        model, ll, dump_path = get_best_model(model_type, data, num_layers, dumps_path)
        
        model.eval()
        model.sample_n = lambda n: batched_sample(model, n)
        to_device(model, 'cpu')

        clf_ds_train, clf_ds_test, metrics = train_cb(model, X_train_tensor, X_test_tensor, [10000, 2])
        
        arr.append({
            'data_name': data_name,
            'model_type': model_type,
            'll': ll,
            'metrics': metrics,
            'dump_path': dump_path,
        })
        print(data_name, model_type, ll, [x['ll'] for x in metrics])
        print(arr[-1])

MINIBOONE GLOW -14.054896354675293 [3.257138512010087, -4.646375509492756]
{'data_name': 'MINIBOONE', 'model_type': 'GLOW', 'll': -14.054896354675293, 'metrics': [{'clip': 10000, 'll': 3.257138512010087, 'auc_roc': 0.9999999248566867, 'logloss': -0.0007338693951429274, 'overhead': 9.127328917145746, 'calibration_constant': -6.671234387179703}, {'clip': 2, 'll': -4.646375509492756, 'auc_roc': 0.9998629385964912, 'logloss': -0.2030827895928715, 'overhead': 8.10166463768899, 'calibration_constant': -7.408517457129044}], 'dump_path': 'dumps_20200602/MINIBOONE/GLOW_5_ind1/checkpoints/50000.checkpoint'}
MINIBOONE MAF -12.341054916381836 [-11.033188823029104, -11.268384160588223]
{'data_name': 'MINIBOONE', 'model_type': 'MAF', 'll': -12.341054916381836, 'metrics': [{'clip': 10000, 'll': -11.033188823029104, 'auc_roc': 0.8285463285578255, 'logloss': -0.48238385914562687, 'overhead': 5.0822899719724965, 'calibration_constant': -0.10975065017796659}, {'clip': 2, 'll': -11.268384160588223, 'auc_r

HEPMASS RealNVP -19.193140029907227 [0.6936667486050601, -8.864275113390764]
{'data_name': 'HEPMASS', 'model_type': 'RealNVP', 'll': -19.193140029907227, 'metrics': [{'clip': 10000, 'll': 0.6936667486050601, 'auc_roc': 0.9999999968000001, 'logloss': -0.00015365008757932887, 'overhead': 10.359239139864494, 'calibration_constant': -5.414492300736859}, {'clip': 2, 'll': -8.864275113390764, 'auc_roc': 0.9999849991, 'logloss': -0.20278806696136475, 'overhead': 9.022068125860551, 'calibration_constant': -8.328920945300606}], 'dump_path': 'dumps_20200602/HEPMASS/RealNVP_5_ind1/final_model.checkpoint'}
Not found dumps_20200602/HEPMASS/SPLINE-AR_2_ind1/final_model.checkpoint
HEPMASS SPLINE-AR -16.326007843017578 [-16.10238395203097, -16.07553506148693]
{'data_name': 'HEPMASS', 'model_type': 'SPLINE-AR', 'll': -16.326007843017578, 'metrics': [{'clip': 10000, 'll': -16.10238395203097, 'auc_roc': 0.6606188503000001, 'logloss': -0.6262053722110621, 'overhead': 2.92123812051837, 'calibration_constan

In [None]:
[x['dump_path'] for x in arr]

In [24]:
metrics = defaultdict(dict)
for x in arr:
    metrics[x['data_name']][x['model_type']] = x['ll']
    for clip in x['metrics']:
        if clip['clip'] == 100:
            metrics[x['data_name']][x['model_type'] + ' C'] = clip['ll']
        else:
            metrics[x['data_name']][x['model_type'] + ' C ' + str(round(clip['clip'], 1))] = clip['ll']
pd.DataFrame(metrics)

Unnamed: 0,MINIBOONE,BSDS300,GAS,HEPMASS,POWER
GLOW,-14.054896,152.595673,9.409331,-18.73378,0.243388
GLOW C 10000,3.257139,167.037307,24.041695,-0.118138,9.642893
GLOW C 2,-4.646376,161.783699,17.78111,-6.931381,6.348253
MAF,-12.341055,146.665405,7.9334,-19.643145,0.128281
MAF C 10000,-11.033189,147.384294,8.622444,-18.831326,0.395814
MAF C 2,-11.268384,147.403227,8.730445,-18.672319,0.43964
RealNVP,-13.369072,144.850861,8.754536,-19.19314,-0.608323
RealNVP C 10000,7.858391,167.785041,25.904475,0.693667,12.426084
RealNVP C 2,-2.628655,156.098972,17.601569,-8.864275,7.615017
SPLINE-AR,-18.083902,155.022278,10.029469,-16.326008,0.374713


In [25]:
metrics = defaultdict(dict)
for x in arr:
    metrics[(x['data_name'], 'll', )][(x['model_type'], 'normalizing flow', )] = x['ll']
    metrics[(x['data_name'], 'log overhead', )][(x['model_type'], 'normalizing flow', )] = 0
#     metrics[(x['data_name'], 'calib const', )][(x['model_type'], 'normalizing flow', )] = 0
    for clip in x['metrics']:
        if clip['clip'] == 10000:
            metrics[(x['data_name'], 'll', )][(x['model_type'], 'calibrated', )] = clip['ll']
            metrics[(x['data_name'], 'log overhead', )][(x['model_type'], 'calibrated', )] = clip['overhead']
            metrics[(x['data_name'], 'AUC-ROC', )][(x['model_type'], 'calibrated', )] = clip['auc_roc']
            metrics[(x['data_name'], 'Logloss', )][(x['model_type'], 'calibrated', )] = -clip['logloss']
#             metrics[(x['data_name'], 'calib const', )][(x['model_type'], 'calibrated', )] = clip['calibration_constant']
        else:
            metrics[(x['data_name'], 'll', )][(x['model_type'], ' calibrated clip ' + str(round(clip['clip'], 2)), )] = clip['ll']
            metrics[(x['data_name'], 'log overhead', )][(x['model_type'], ' calibrated clip ' + str(round(clip['clip'], 2)), )] = clip['overhead']
            metrics[(x['data_name'], 'AUC-ROC', )][(x['model_type'], ' calibrated clip ' + str(round(clip['clip'], 2)), )] = clip['auc_roc']
            metrics[(x['data_name'], 'Logloss', )][(x['model_type'], ' calibrated clip ' + str(round(clip['clip'], 2)), )] = -clip['logloss']
#             metrics[(x['data_name'], 'calib const', )][(x['model_type'], ' calibrated clip ' + str(round(clip['clip'], 2)), )] = clip['calibration_constant']
pd.DataFrame(metrics)

Unnamed: 0_level_0,Unnamed: 1_level_0,MINIBOONE,MINIBOONE,MINIBOONE,MINIBOONE,BSDS300,BSDS300,BSDS300,BSDS300,GAS,GAS,GAS,GAS,HEPMASS,HEPMASS,HEPMASS,HEPMASS,POWER,POWER,POWER,POWER
Unnamed: 0_level_1,Unnamed: 1_level_1,ll,log overhead,AUC-ROC,Logloss,ll,log overhead,AUC-ROC,Logloss,ll,log overhead,AUC-ROC,Logloss,ll,log overhead,AUC-ROC,Logloss,ll,log overhead,AUC-ROC,Logloss
GLOW,calibrated clip 2,-4.646376,8.101665,0.999863,0.203083,161.783699,7.881317,0.999875,0.202987,17.78111,7.065633,0.99989,0.203411,-6.931381,10.495564,0.999975,0.20279,6.348253,4.81734,0.99811,0.209685
GLOW,calibrated,3.257139,9.127329,1.0,0.000734,167.037307,11.259052,0.999994,0.000648,24.041695,11.162142,0.999999,0.001375,-0.118138,11.499224,1.0,0.000163,9.642893,9.87278,0.999842,0.013208
GLOW,normalizing flow,-14.054896,0.0,,,152.595673,0.0,,,9.409331,0.0,,,-18.73378,0.0,,,0.243388,0.0,,
MAF,calibrated clip 2,-11.268384,1.032458,0.81679,0.522898,147.403227,0.944922,0.775838,0.583912,8.730445,0.953603,0.811377,0.553678,-18.672319,0.985831,0.821791,0.520647,0.43964,0.760673,0.694023,0.625648
MAF,calibrated,-11.033189,5.08229,0.828546,0.482384,147.384294,5.2276,0.782031,0.558692,8.622444,3.428179,0.816297,0.534475,-18.831326,3.506629,0.829421,0.499498,0.395814,2.846606,0.694971,0.622451
MAF,normalizing flow,-12.341055,0.0,,,146.665405,0.0,,,7.9334,0.0,,,-19.643145,0.0,,,0.128281,0.0,,
RealNVP,calibrated clip 2,-2.628655,9.433601,1.0,0.202741,156.098972,9.941346,0.999985,0.202775,17.601569,7.540894,0.999925,0.203127,-8.864275,9.022068,0.999985,0.202788,7.615017,6.919286,0.999749,0.203809
RealNVP,calibrated,7.858391,9.912777,1.0,8.1e-05,167.785041,11.097815,1.0,0.000155,25.904475,9.821081,1.0,0.000846,0.693667,10.359239,1.0,0.000154,12.426084,11.228415,0.999997,0.001967
RealNVP,normalizing flow,-13.369072,0.0,,,144.850861,0.0,,,8.754536,0.0,,,-19.19314,0.0,,,-0.608323,0.0,,
SPLINE-AR,calibrated clip 2,-16.915615,1.059217,0.834607,0.495705,155.237688,0.728753,0.654254,0.645982,10.336423,0.765704,0.708654,0.623145,-16.075535,0.715117,0.660355,0.626709,0.524116,0.712685,0.631375,0.657516
