In [1]:
import sys
sys.path.append("/vol/bitbucket/ad6013/Research/gp-causal")
import numpy as np
import matplotlib.pyplot as plt

import pickle
from sklearn.mixture import BayesianGaussianMixture
from data import get_data
from sklearn.metrics import roc_curve, auc
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

In [26]:
# Need to load all the data
def return_adam_files(data_name):
    files = [
        f"fullscore-{data_name}_pairs-gplvm_adam-reinit2-numind200_start:{i}_end:{i+20}.p"
        for i in np.linspace(0, 280, 15, dtype=int)
    ]
    return files


def return_bfgs_files(data_name):
    files = [
        f"fullscore-{data_name}-gplvm-reinit20-numind200_start:{i}_end:{i+10}.p"
        for i in np.linspace(0, 90, 10, dtype=int)
    ]
    return files

In [27]:
np.linspace(0, 90, 10, dtype=int)

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [34]:
def convert_file_scores_into_dict(files):
    """
    Convert files saved by runs to one big dict of scores with the key being
    the run_number and the value being the tuple of a tuple of scores of the
    form ( (x, y|x), (y, x|y) )
    """
    work_dir = "/vol/bitbucket/ad6013/Research/gp-causal"
    all_scores = {}
    for file_idx in range(len(files)):
        # Open  file results
        with open(f"{work_dir}/results/{files[file_idx]}", "rb") as f:
            results = pickle.load(f)
        for i in range(len(results['scores'])):
            idx = 10 * file_idx + i
            all_scores[idx] = results["scores"][i]
    return all_scores

In [35]:
def return_best_between_two_scores(scores_1, scores_2):
    """
    Scores should be dict with values of tuples of tuples
    ( (x, y|x), (y, x|y) )
    """
    all_scores = {}
    for idx in scores_1.keys(): 
        if idx not in scores_2.keys():
            raise ValueError(f"Run idx mismatch for run {idx}")
        else:
            scores_1_idx = scores_1[idx]
            scores_2_idx = scores_2[idx]
            min_score_x = min(scores_1_idx[0][0], scores_2_idx[0][0])
            min_score_y_x = min(scores_1_idx[0][1], scores_2_idx[0][1]) 
            min_score_y = min(scores_1_idx[1][0], scores_2_idx[1][0])
            min_score_x_y = min(scores_1_idx[1][1], scores_2_idx[1][1]) 
            final_scores = (
                (min_score_x, min_score_y_x),
                (min_score_y, min_score_x_y)
            )
            all_scores[idx] = final_scores
    return all_scores

In [90]:
def return_best_between_marginal_and_full_scores(marginal_score, full_score):
    """
    Scores should be dict with values of tuples of tuples
    maringal_score = (x, y)
    full_score: ( (x, y|x), (y, x|y) )
    """
    all_scores = {}
    for idx in marginal_score.keys(): 
        if idx not in full_score.keys():
            pass
            # raise ValueError(f"Run idx mismatch for run {idx}")
        else:
            scores_marg_idx = marginal_score[idx]
            scores_full_idx = full_score[idx]
            # min_score_x = min(scores_marg_idx[0], scores_full_idx[0][0])
            min_score_x = scores_marg_idx[0]
            min_score_y_x = scores_full_idx[0][1] 
            # min_score_y = min(scores_marg_idx[1], scores_full_idx[1][0])
            min_score_y =  scores_marg_idx[1] 
            min_score_x_y = scores_full_idx[1][1]
            final_scores = (
                (min_score_x, min_score_y_x),
                (min_score_y, min_score_x_y)
            )
            all_scores[idx] = final_scores
    return all_scores

In [91]:
def get_auc_scores(data_name, scores):
    work_dir = "/vol/bitbucket/ad6013/Research/gp-causal"
    data_get = getattr(get_data, f"get_tubingen_pairs_dataset")
    x, y, weight, target = data_get(data_path=f"{work_dir}/data/pairs/files")

    y_scores = []
    y_labels = []
    for idx in scores.keys():
        causal = sum(scores[idx][0])
        anti_causal = sum(scores[idx][1])
        final_score = - causal + anti_causal 
        y_labels.append(target[idx])
        y_scores.append(final_score)

    random_choice = np.random.choice(len(y_labels), size=len(y_labels) // 2)
    for i in random_choice:
        y_labels[i] *= -1
        y_scores[i] *= -1

    fpr, tpr, _ = roc_curve(y_labels, y_scores)
    roc_auc = auc(fpr, tpr)
    return roc_auc


In [85]:
data_name = "cep"
# adam_files = return_adam_files(data_name=data_name)
bfgs_files = return_bfgs_files(data_name=data_name)
# adam_scores = convert_file_scores_into_dict(adam_files)
bfgs_scores = convert_file_scores_into_dict(bfgs_files)
# scores = return_best_between_two_scores(adam_scores, bfgs_scores)
adam_bfgs_auc = get_auc_scores(data_name, bfgs_scores)

Load cause-effect pairs: 100%|██████████| 100/100 [00:00<00:00, 226.24it/s]


In [86]:
adam_bfgs_auc

0.6986817325800376

In [58]:
def bayesgmm_score(train_data, n_components):
    model = BayesianGaussianMixture(
        n_components=n_components,
        max_iter=int(1e6),
    ).fit(train_data)
    return - np.sum(model.score_samples(train_data))

In [63]:
def return_bayesian_gmm_scores(data_name, num_restarts):
    """
    Will return scores of Bayesian GMM for a dataset.
    """
    work_dir = "/vol/bitbucket/ad6013/Research/gp-causal"
    data_get = getattr(get_data, f"get_tubingen_pairs_dataset")
    x, y, weight, target = data_get(data_path=f"{work_dir}/data/pairs/files")

    all_scores = {}
    n_components_array = np.arange(3, 10)
    for idx in tqdm(range(len(x)), desc="Running BayesGMM"):
        run_score_x = []
        run_score_y = []
        for i in range(num_restarts):
            train_x = x[idx]
            train_y = y[idx]
            # Normalise the data
            train_x = StandardScaler().fit_transform(train_x).astype(np.float64)
            train_y = StandardScaler().fit_transform(train_y).astype(np.float64)

            n_components = np.random.choice(n_components_array)
            x_score = bayesgmm_score(
                train_x,
                n_components=n_components,
            )
            y_score = bayesgmm_score(
                train_y,
                n_components=n_components,
            )
            
            run_score_x.append(x_score)
            run_score_y.append(y_score)
     
        all_scores[idx] = (min(run_score_x), min(run_score_y))
    return all_scores

In [68]:
bayesgmm_scores = return_bayesian_gmm_scores(data_name, 20)

Load cause-effect pairs: 100%|██████████| 100/100 [00:00<00:00, 226.01it/s]
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  cluster.KMeans(
  clus

In [78]:
bayesgmm_scores

{0: (369.5587079334272, 432.2758528852618),
 1: (369.57397618367474, 412.7270931700559),
 2: (483.58177918746304, 432.2596955210503),
 3: (369.6072231599737, 487.5836857519792),
 4: (2949.86750393716, 5711.104619493236),
 5: (2949.938093768742, 5660.233072649171),
 6: (3118.874831921589, 5710.901658863453),
 7: (3542.263645623937, 5565.796772873109),
 8: (3228.580986572403, 5648.988607719781),
 9: (2950.0270968451127, 5588.158567716921),
 10: (3228.6610022749683, 5623.140719181346),
 11: (6755.152971496134, 4903.097562495011),
 12: (423.69066227880114, 533.0335384726119),
 13: (485.7980447981433, 533.0631353373961),
 14: (506.514793089718, 533.0663650028949),
 15: (485.8010548676707, 555.4118492423411),
 16: (6914.256460307017, -2635.21337778583),
 17: (354.7909542077501, 376.76382737265874),
 18: (172.73293505016574, 248.38183499647278),
 19: (461.2718400951087, 432.25423162648946),
 20: (483.5823225788797, 412.7263486030435),
 21: (634.7596084765026, 569.7802909631315),
 22: (636.532

In [79]:
bfgs_scores

{0: ((380.4152794302315, 212.44111712680163),
  (453.78477419861576, 199.2502760255514)),
 1: ((380.4152794302315, 302.66297746126816),
  (433.92545168799893, 280.436275773376)),
 2: ((495.86265291184725, 424.01120420151096),
  (453.78477419861576, 465.37694569215864)),
 3: ((380.41458332561797, 487.52499522561357),
  (495.2095490215442, 397.340589947514)),
 4: ((380.41458332561797, 487.52499522561357),
  (495.2095490215442, 397.340589947514)),
 5: ((380.41458332561797, 487.52499522561357),
  (495.2095490215442, 397.340589947514)),
 10: ((-218.73398362418993, 4173.638674743706),
  (5634.863776636774, 2329.864293066208)),
 11: ((5416.376517551311, 4244.62228851653),
  (4166.258204279944, 5426.640553992867)),
 12: ((398.2047316557823, 294.4956044467922),
  (548.5704450445514, 207.1354560890768)),
 13: ((398.2047316557823, 294.4956044467922),
  (548.5704268674258, 207.1354560890768)),
 14: ((398.2047316557823, 277.9452768409954),
  (548.570403729393, 207.1354560890768)),
 15: ((398.204731

In [92]:
best_gmm_full_scores = return_best_between_marginal_and_full_scores(
    bayesgmm_scores, bfgs_scores 
)
bayes_gmm_auc = get_auc_scores(data_name, best_gmm_full_scores)

Load cause-effect pairs: 100%|██████████| 100/100 [00:00<00:00, 311.85it/s]


In [93]:
bayes_gmm_auc

0.515426497277677

In [94]:
print(f"AUC: {adam_bfgs_auc}, GMM AUC: {bayes_gmm_auc}")

AUC: 0.6986817325800376, GMM AUC: 0.515426497277677
