In [None]:
import pickle
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm import tqdm
import pandas as pd
import typing
import scipy
from collections import OrderedDict
from sklearn.manifold import TSNE

In [None]:
%matplotlib inline

In [None]:
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

In [None]:
output_notebook()

In [None]:
import sys
sys.path.append(".")

In [None]:
from tda.graph_dataset import get_dataset
from tda.embeddings.weisfeiler_lehman import get_wl_embedding
from tda.embeddings import EmbeddingType, get_embedding

In [None]:
datasets = {0: get_dataset(
            num_epochs=20,
            epsilon=0.04,
            noise=0.0,
            adv=False)
           }

In [None]:
for epsilon in np.linspace(0.01, 0.075, num=5):
    print(f"Trying epsilon={epsilon}")
    datasets[epsilon] = get_dataset(
            num_epochs=20,
            epsilon=epsilon,
            noise=0.0,
            adv=True
)

In [None]:
def get_vector_from_diagram(dgm):
    """
    Simple tentative to get vector from persistent diagram
    (Top 20 lifespans)
    """
    return list(reversed(sorted([dp.death-dp.birth for dp in dgm][1:])))[:20]

In [None]:
embeddings = list()

for epsilon in datasets:
    print(f"Trying epsilon={epsilon}")
    ds = datasets[epsilon]
    for idx in tqdm(range(len(ds[:100]))):
        embedding = get_embedding(
            embedding_type=EmbeddingType.WeisfeilerLehman,
            graph=ds[idx][0],
            params={'threshold': 25000, 'height': 3, 'hash_size': 50}
        )
        #embedding = get_embedding(
        #    embedding_type=EmbeddingType.AnonymousWalk,
        #    graph=ds[idx][0],
        #    params={'steps': 5, 'threshold': 25000}
        #)
        #dgm = get_embedding(
        #    embedding_type=EmbeddingType.PersistentDiagram,
        #    graph=ds[idx][0],
        #    params={'threshold': 25000}
        #)
        #embedding = get_vector_from_diagram(dgm)
        
        embeddings.append((embedding, ds[idx][1], ds[idx][2], ds[idx][3], epsilon))


In [None]:
tsne = TSNE(n_components=2, verbose=1, perplexity=20, n_iter=100000)
viz_data = tsne.fit_transform([np.ndarray.flatten(np.array((e[0]))) for e in embeddings])

In [None]:
from palettable.colorbrewer.qualitative import Paired_12
from palettable.cartocolors.diverging import Temps_6

def epsilon_color(epsilon):
    keys = sorted(list(datasets.keys()))
    idx = keys.index(epsilon)
    return Temps_6.hex_colors[idx]



df = pd.DataFrame({
    "x1": viz_data[:,0],
    "x2": viz_data[:, 1],
    "y": [e[1] for e in embeddings],
    "y_pred": [e[2] for e in embeddings],
    "y_adv": [e[3] for e in embeddings],
    "epsilon": [e[4] for e in embeddings]
})

df["color"] = df["y"].apply(lambda val: Paired_12.hex_colors[val])
df["color_adv"] = df["y_adv"].apply(lambda val: Paired_12.hex_colors[val])
df["color_eps"] = df["epsilon"].apply(epsilon_color)

def epsilon_legend(epsilon):
    eps_str = str(int(epsilon * 1000) / 1000)
    accuracies = ["92.0%", "88.8%", "80.3%", "67.0%", "47.8%", "32.2%"]
    keys = sorted(list(datasets.keys()))
    idx = keys.index(epsilon)
    acc = accuracies[idx]
    return f"{eps_str} (acc {acc})"

df["legend"] = df["epsilon"].apply(epsilon_legend)

df.head()

In [None]:
f = figure(title="AnonymousWalk 5 steps for various epsilon")

f.scatter(source=ColumnDataSource(df),
          x="x1", y="x2", color="color_eps", legend="legend", marker="circle")


show(f)