In [1]:
import collections
import math
import os
import sys
import argparse
import random
from tempfile import gettempdir
import zipfile
import itertools

import networkx as nx

from graph_utils import rand_permute_adj_matrix, is_isomorphic_from_adj

import numpy as np
from six.moves import urllib
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf

from tensorflow.contrib.tensorboard.plugins import projector

In [2]:
random_seed = 0
np.random.seed(random_seed)

#### Graph dataset generation

In [6]:
def generate_batch(batch_size, num_nodes):
    assert batch_size % 4 == 0
    
    batch_input_1 = np.ndarray(shape=(batch_size, num_nodes, num_nodes), dtype=np.int32)
    batch_input_2 = np.ndarray(shape=(batch_size, num_nodes, num_nodes), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    
    # Generate two random graphs
    adj_mat_1 = np.random.randint(0, 2, size=[num_nodes]*2)
    adj_mat_2 = np.random.randint(0, 2, size=[num_nodes]*2)
    # Ensure the diagonal is zero
    adj_mat_1 -= adj_mat_1 * np.eye(num_nodes, dtype=adj_mat_1.dtype)
    adj_mat_2 -= adj_mat_2 * np.eye(num_nodes, dtype=adj_mat_2.dtype)
    
    # Check whether the two graphs are isomorphic
    if is_isomorphic_from_adj(adj_mat_1, adj_mat_2):
        # If isomorphic, use the following sophisticated method to make them not isomorphic
        idx_to_change = [np.random.randint(0, num_nodes), np.random.randint(0, num_nodes - 1)]
        idx_to_change[1] += 1 if idx_to_change[1] == idx_to_change[0] else 0  # Ensures index is off diagonal
        idx_to_change = tuple(idx_to_change)
        adj_mat_2[idx_to_change] = not adj_mat_2[idx_to_change]
    
    adj_mats = (adj_mat_1, adj_mat_2)
    # Generate some random permutations of the input graphs
    i = 0
    # For each batch make an equal number of example with [g1, g1], [g1, g2], [g2, g1], [g2, g2]
    for j, k in itertools.product(range(2), repeat=2):
        for _ in range(batch_size // 4):
            batch_input_1[i, :, :] = rand_permute_adj_matrix(adj_mats[j])
            batch_input_2[i, :, :] = rand_permute_adj_matrix(adj_mats[k])
            # Set label to 1 if graphs isomorphic, to 0 otherwise
            labels[i] = 1 if j == k else 0
            i += 1
            
    return batch_input_1, batch_input_2, labels

In [None]:
# generate_batch(4, 2)

###### Graph

In [12]:
num_nodes = 10
batch_size = 100
learning_rate = 0.1


graph = tf.Graph()

with graph.as_default():
#     # Ops and variables pinned to the CPU because of missing GPU implementation
#     with tf.device('/cpu:0'):
    
    # Input data.
    with tf.name_scope('inputs'):
        train_input_1 = tf.placeholder(tf.float32, shape=[batch_size, num_nodes, num_nodes])
        train_input_2 = tf.placeholder(tf.float32, shape=[batch_size, num_nodes, num_nodes])
        train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
        train_input_1_flat = tf.reshape(train_input_1, shape=[batch_size, num_nodes**2])
        train_input_2_flat = tf.reshape(train_input_2, shape=[batch_size, num_nodes**2])
#         valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
    
    
    # The embedding network
    # Both networks share the weights
    with tf.name_scope('joint_embedding'):
        hidden_size = 30
        embedding_size = 20
        with tf.variable_scope('embed'):
            fc_1_1 = tf.layers.dense(train_input_1_flat, hidden_size, activation=tf.nn.relu, trainable=True, name='fc_1')
            embed_1 = tf.layers.dense(fc_1_1, embedding_size, activation=None, trainable=True, name='fc_2')  # Try activation function here?

        with tf.variable_scope('embed', reuse=True):
            fc_1_2 = tf.layers.dense(train_input_2_flat, hidden_size, activation=tf.nn.relu, trainable=True, name='fc_1', reuse=True)
            embed_2 = tf.layers.dense(fc_1_2, embedding_size, activation=None, trainable=True, name='fc_2', reuse=True)
    
    with tf.name_scope('discriminate'):
        combined_embedding = tf.concat([embed_1, embed_2], 1, name='concat')  # Check whether axis is right
        out = tf.layers.dense(combined_embedding, 1, activation=None, trainable=True, name='fc_3')    


    # Compute the loss
    with tf.name_scope('loss'):
        loss = tf.reduce_mean(tf.losses.sigmoid_cross_entropy(train_labels, out))

    # Add the loss value as a scalar to summary.
    tf.summary.scalar('loss', loss)

    # Construct the SGD optimizer using a learning rate
    with tf.name_scope('optimizer'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    # Compute the cosine similarity between minibatch examples and all embeddings.
#     norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True))
#     normalized_embeddings = embeddings / norm
#     valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings,
#                                               valid_dataset)
#     similarity = tf.matmul(
#         valid_embeddings, normalized_embeddings, transpose_b=True)

    # Merge all summaries.
    merged = tf.summary.merge_all()

    # Add variable initializer.
    init = tf.global_variables_initializer()

    # Create a saver.
    saver = tf.train.Saver()



###### Training

In [None]:
# Step 5: Begin training.
num_steps = 1000

with tf.Session(graph=graph) as session:
    # Open a writer to write summaries.
    writer = tf.summary.FileWriter(FLAGS.log_dir, session.graph)

    # We must initialize all variables before we use them.
    init.run()
    print('Initialised')

    average_loss = 0
    for step in range(num_steps):
        batch_inputs, batch_labels = generate_batch(batch_size, num_skips,
                                                    skip_window)
        feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}

        # Define metadata variable.
        run_metadata = tf.RunMetadata()

        # We perform one update step by evaluating the optimizer op (including it
        # in the list of returned values for session.run()
        # Also, evaluate the merged op to get all summaries from the returned "summary" variable.
        # Feed metadata variable to session for visualizing the graph in TensorBoard.
        _, summary, loss_val = session.run(
            [optimizer, merged, loss],
            feed_dict=feed_dict,
            run_metadata=run_metadata)
        average_loss += loss_val

        # Add returned summaries to writer in each step.
        writer.add_summary(summary, step)
        # Add metadata to visualize the graph for the last run.
        if step == (num_steps - 1):
            writer.add_run_metadata(run_metadata, 'step%d' % step)

        if step % 2000 == 0:
            if step > 0:
                average_loss /= 2000
            # The average loss is an estimate of the loss over the last 2000 batches.
            print('Average loss at step ', step, ': ', average_loss)
            average_loss = 0

        # Note that this is expensive (~20% slowdown if computed every 500 steps)
        if step % 10000 == 0:
            sim = similarity.eval()
            for i in xrange(valid_size):
                valid_word = reverse_dictionary[valid_examples[i]]
                top_k = 8  # number of nearest neighbors
                nearest = (-sim[i, :]).argsort()[1:top_k + 1]
                log_str = 'Nearest to %s:' % valid_word
                for k in xrange(top_k):
                    close_word = reverse_dictionary[nearest[k]]
                    log_str = '%s %s,' % (log_str, close_word)
                print(log_str)
    final_embeddings = normalized_embeddings.eval()

    # Write corresponding labels for the embeddings.
    with open(FLAGS.log_dir + '/metadata.tsv', 'w') as f:
        for i in xrange(vocabulary_size):
            f.write(reverse_dictionary[i] + '\n')

    # Save the model for checkpoints.
    saver.save(session, os.path.join(FLAGS.log_dir, 'model.ckpt'))

    # Create a configuration for visualizing embeddings with the labels in TensorBoard.
    config = projector.ProjectorConfig()
    embedding_conf = config.embeddings.add()
    embedding_conf.tensor_name = embeddings.name
    embedding_conf.metadata_path = os.path.join(FLAGS.log_dir, 'metadata.tsv')
    projector.visualize_embeddings(writer, config)

writer.close()


# Step 6: Visualize the embeddings.


# # pylint: disable=missing-docstring
# # Function to draw visualization of distance between embeddings.
# def plot_with_labels(low_dim_embs, labels, filename):
#     assert low_dim_embs.shape[0] >= len(labels), 'More labels than embeddings'
#     plt.figure(figsize=(18, 18))  # in inches
#     for i, label in enumerate(labels):
#         x, y = low_dim_embs[i, :]
#         plt.scatter(x, y)
#         plt.annotate(
#             label,
#             xy=(x, y),
#             xytext=(5, 2),
#             textcoords='offset points',
#             ha='right',
#             va='bottom')

#     plt.savefig(filename)


# try:
#     # pylint: disable=g-import-not-at-top
#     from sklearn.manifold import TSNE
#     import matplotlib.pyplot as plt

#     tsne = TSNE(
#         perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact')
#     plot_only = 500
#     low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
#     labels = [reverse_dictionary[i] for i in xrange(plot_only)]
#     plot_with_labels(low_dim_embs, labels, os.path.join(gettempdir(), 'tsne.png'))

# except ImportError as ex:
#     print('Please install sklearn, matplotlib, and scipy to show embeddings.')
#     print(ex)
