In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import sys 
import networkx as nx 
import seaborn as sns 
from constants import ROOT_DIR

# setting the path of this notebook to the root directory
sys.path.append(ROOT_DIR)
# import libraries custom in this repository
import methods.DataIO as DataIO
import methods.GromovWassersteinFramework as GWL
import dev.generate_util as Gen

sns.set()
%matplotlib inline

Set experiment parameters: 

- `GEN_FUNC` is the generating function for a graph adjacency matrix
- `UNDIRECTED` is a boolean for whether the graph is undirected
- `BASE_SIZE` is the number of nodes in the smaller graph
- `MAX_DIFF` is the maximum discrepancy between the base graph and a larger graph
- `INCREMENTS` is the incremental unit between base graph and larger graph from iteration (i) to (i + 1)
- `OT_DICT` is a dictionary containing the arguments to the optimal transport training process

In [17]:
GEN_FUNC = Gen.gen_star_graph
UNDIRECTED = True 
BASE_SIZE = 10
MAX_SIZE = 20
INCREMENTS = 1
OUTER_ITER = 1000
INNER_ITER = 10
GEN_FUNC_NAME = f"Cycle with base size = {BASE_SIZE}"

OT_DICT = {'loss_type': 'L2',  # the key hyperparameters of GW distance
            'ot_method': 'proximal',
            'beta': 0.01,
            'outer_iteration': OUTER_ITER,  # outer, inner iteration, error bound of optimal transport
            'iter_bound': 1e-30,
            'inner_iteration': INNER_ITER,
            'sk_bound': 1e-30,
            'max_iter': 1,  # iteration and error bound for calcuating barycenter
            'cost_bound': 1e-16,
            'update_p': False,  # optional updates of source distribution
            'lr': 0.1,
            'node_prior': None,
            'alpha': 0,
            'test_mode': True}

In [18]:
N_EXPERIMENTS = (MAX_SIZE - BASE_SIZE) // INCREMENTS

# initialize storage vector for experimental results
results_mean = np.zeros(N_EXPERIMENTS)
results_std = np.zeros(N_EXPERIMENTS)

# initialize base graph 
base_graph = nx.Graph(GEN_FUNC(BASE_SIZE))
p_s, cost_s, _ = DataIO.extract_graph_info(base_graph)
p_s /= np.sum(p_s) 
if UNDIRECTED: 
    cost_s = cost_s + cost_s.T

for i in range(1, N_EXPERIMENTS): 
    # create a new instance of graph with INCREMENT larger size 
    # than the previous iteration
    new_size = BASE_SIZE + i * INCREMENTS
    new_graph = nx.Graph(GEN_FUNC(new_size))
    p_t, cost_t, _ = DataIO.extract_graph_info(new_graph)
    p_t /= np.sum(p_t)
    if UNDIRECTED: 
        cost_t = cost_t + cost_t.T
    # compute gromov-wasserstein differential between base graph and new graph
    d_gw = None
    for j in range(INNER_ITER): 
        trans, d_gw, _ = GWL.gromov_wasserstein_discrepancy(cost_s, cost_t, p_s, p_t, OT_DICT)

    # store the mean and SDs for last iteration
    results_mean[i] = np.mean(d_gw)
    results_std[i] = 0.25 * np.std(d_gw)

# remove the first entries b/c they default to zero
results_mean = results_mean[1:]
results_std = results_std[1:]

# visualize the results 
plt.figure(figsize=(5, 5))
plt.plot(range(1, N_EXPERIMENTS), results_mean, label=GEN_FUNC_NAME, color='blue')
plt.fill_between(range(1, N_EXPERIMENTS), results_mean - results_std, results_mean + results_std,
                    color='blue', alpha=0.2)
plt.legend()
plt.xlabel('Difference in Size')
plt.ylabel('GW discrepancy')

ValueError: could not broadcast input array from shape (5,) into shape (1000,)