In [1]:
!pip install pygsp
!pip install ndlib



In [0]:
import time
import os
import argparse
import sys
import numpy as np
import json
import matplotlib
import random

#Take off warnings
import warnings
def fxn():
    warnings.warn("deprecated", DeprecationWarning)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()



from minibatch_sources import MinibatchSource

matplotlib.use("Agg")

from pygsp import graphs
import networkx as nx

import tensorflow as tf

from models import deep_fir_tv_fc_fn, fc_fn, \
    deep_cheb_fc_fn, deep_sep_fir_fc_fn
from laplacian import initialize_laplacian_tensor
from coarsening import coarsen, perm_data, keep_pooling_laplacians
from data_epidemics import epidemics_generator, load_from_npy, save_to_npy

#from graph_utils.visualization import plot_tf_fir_filter

FLAGS = None
#FILEDIR = os.path.dirname(os.path.realpath(__file__))
#TEMPDIR = os.path.realpath(os.path.join(FILEDIR, "../experiments"))

In [0]:
model_type="deep_fir"
action="train"
learning_rate=1e-4
num_epochs=1
num_train=6000
num_test=2000
batch_size=100
seed=15
shot_noise=1
num_vertices=500
num_frames=128
vertex_filter_orders=[4, 4, 4]
time_filter_orders=[3, 3, 3]
num_filters=[8, 16, 32]
time_poolings=[4, 4, 4]
vertex_poolings=[2, 2, 2]
num_classes=2
load_data=False

In [0]:

def _fill_feed_dict(mb_source, x, y, dropout, phase, is_training):
    (data, labels), is_end = mb_source.next_batch(batch_size)
    feed_dict = {x: data, y: labels, dropout: 0.5 if is_training else 1, phase: is_training}
    still_data = not is_end
    return feed_dict, still_data


def run_training(L, train_mb_source, test_mb_source):
    """Performs training and evaluation."""

    # Create data placeholders
    num_vertices, _ = L[0].get_shape()
    x = tf.compat.v1.placeholder(tf.float32, [None, num_vertices, num_frames, 1], name="x")
    y_ = tf.compat.v1.placeholder(tf.uint8, name="labels")
    y_hot = tf.one_hot(y_, num_classes)

    # Initialize model
    if model_type == "deep_fir":
        print("Training deep FIR-TV model...")
        logits, phase = deep_fir_tv_fc_fn(x=x,
                                          L=L,
                                          num_classes=num_classes,
                                          time_filter_orders=time_filter_orders,
                                          vertex_filter_orders=vertex_filter_orders,
                                          num_filters=num_filters,
                                          time_poolings=time_poolings,
                                          vertex_poolings=vertex_poolings,
                                          shot_noise=shot_noise)
        dropout = tf.compat.v1.placeholder(tf.float32, name="keep_prob")
    elif model_type == "deep_cheb":
        print("Training deep Chebyshev time invariant model...")
        xt = tf.transpose(x, perm=[0, 1, 3, 2])
        logits, phase = deep_cheb_fc_fn(x=xt,
                                        L=L,
                                        num_classes=num_classes,
                                        vertex_filter_orders=vertex_filter_orders,
                                        num_filters=num_filters,
                                        vertex_poolings=vertex_poolings,
                                        shot_noise=shot_noise)
        dropout = tf.compat.v1.placeholder(tf.float32, name="keep_prob")
    elif model_type == "deep_sep":
        print("Training deep separable FIR model...")
        logits, phase = deep_sep_fir_fc_fn(x=x,
                                           L=L,
                                           num_classes=num_classes,
                                           time_filter_orders=time_filter_orders,
                                           vertex_filter_orders=vertex_filter_orders,
                                           num_filters=num_filters,
                                           time_poolings=time_poolings,
                                           vertex_poolings=vertex_poolings)
        dropout = tf.compat.v1.placeholder(tf.float32, name="keep_prob")
    elif model_type == "fc":
        print("Training linear classifier model...")
        logits = fc_fn(x, num_classes)
        dropout = tf.compat.v1.placeholder(tf.float32, name="keep_prob")
        phase = tf.compat.v1.placeholder(tf.bool, name="phase")
    else:
        raise ValueError("model_type not valid.")

    # Define loss
    with tf.name_scope("loss"):
        cross_entropy = tf.losses.softmax_cross_entropy(y_hot, logits=logits)
        loss = tf.reduce_mean(cross_entropy)
        tf.compat.v1.summary.scalar('xentropy', loss)

        # Define metric
    with tf.name_scope("metric"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_hot, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32, name="correct_prediction")
        accuracy = tf.reduce_mean(correct_prediction, name="accuracy")
        tf.compat.v1.summary.scalar('accuracy', accuracy)

    extra_update_ops = tf.compat.v1.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(extra_update_ops):
        # Select optimizer
        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        opt_train = optimizer.minimize(loss, global_step=global_step)

    # Build the summary Tensor based on the TF collection of Summaries.
    summary = tf.compat.v1.summary.merge_all()

    # Create a saver for writing training checkpoints.
    #saver = tf.train.Saver()

    print("Number of training parameters:", _number_of_trainable_params())

    # Run session
    with tf.Session() as sess:

        # Instantiate a SummaryWriter to output summaries and the Graph.
        #train_writer = tf.compat.v1.summary.FileWriter(log_dir + "/train", sess.graph)
        #test_writer = tf.compat.v1.summary.FileWriter(log_dir + "/test", sess.graph)

        sess.run(tf.global_variables_initializer())

        MAX_STEPS = num_epochs * num_train // batch_size

        # Start training loop
        epoch_count = 0
        for step in range(MAX_STEPS):

            start_time = time.time()

            feed_dict, _ = _fill_feed_dict(train_mb_source, x, y_, dropout, phase, True)

            # Perform one training iteration
            _, loss_value = sess.run([opt_train, loss],
                                     feed_dict=feed_dict)

            duration = time.time() - start_time

            if step % (num_train // batch_size) == 0:
                print("Epoch %d" % epoch_count)
                print("--------------------")
                epoch_count += 1

            # Write the summaries and print an overview fairly often.
            if step % 10 == 0:
                # Print status to stdout.
                accuracy_value = sess.run(accuracy, feed_dict=feed_dict)
                print('Step %d: loss = %.2f accuracy = %.2f (%.3f sec)' % (step, loss_value, accuracy_value, duration))
                # Update the events file.
                #summary_str = sess.run(summary, feed_dict=feed_dict)
                #train_writer.add_summary(summary_str, step)
                #train_writer.flush()

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % (num_train // batch_size) == 0 or (step + 1) == MAX_STEPS or (
                    step + 1) % 30 == 0:
                #checkpoint_file = os.path.join(log_dir, 'model')
                #saver.save(sess, checkpoint_file, global_step=step)

                test_accuracy = _eval_metric(sess, correct_prediction, dropout, phase, x, y_, test_mb_source)

                #test_summary = tf.compat.v1.Summary(value=[tf.compat.v1.Summary.Value(tag="test_accuracy", simple_value=test_accuracy)])
                #test_writer.add_summary(test_summary, step)
                #test_writer.flush()

                print("--------------------")
                print('Test accuracy = %.2f' % test_accuracy)
                if (step + 1) % (num_train // batch_size) == 0:
                    print("====================")
                else:
                    print("--------------------")


def _eval_metric(sess, correct_prediction, dropout, phase, x, y, test_mb_source, ):
    still_data = True
    test_correct_predictions = []
    test_mb_source.restart()
    while still_data:
        test_feed_dict, still_data = _fill_feed_dict(test_mb_source, x, y, dropout, phase, False)
        test_correct_predictions.append(sess.run(correct_prediction, feed_dict=test_feed_dict))

    print(test_correct_predictions[1])
    print(len(test_correct_predictions))
    return np.mean(test_correct_predictions)


def run_eval(test_mb_source):
    with tf.Session() as sess:
        #saver = tf.train.import_meta_graph(
          #  os.path.join(log_dir, "model-" + str(_last_checkpoint(log_dir)) + ".meta"))
        #saver.restore(sess, tf.train.latest_checkpoint(log_dir))
        graph = tf.get_default_graph()

        # Get inputs
        x = graph.get_tensor_by_name("x:0")
        y = graph.get_tensor_by_name("labels:0")
        keep_prob = graph.get_tensor_by_name("keep_prob:0")
        phase = graph.get_tensor_by_name("phase:0")

        # Get output
        correct_prediction = graph.get_tensor_by_name("metric/correct_prediction:0")

        print("Evaluation accuracy: %.2f" % _eval_metric(sess, correct_prediction, keep_prob, phase, x, y,
                                                         test_mb_source))

        for idx, v in enumerate([v for v in tf.trainable_variables() if "conv" in v.name]):
            plot_tf_fir_filter(sess, v, os.path.join(log_dir, "conv_%d" % idx))



def _number_of_pooling_levels(vertex_poolings):
    return np.log2(np.prod(vertex_poolings)).astype(int)


def _number_of_trainable_params():
    return np.sum([np.product(x.shape) for x in tf.trainable_variables()])


def _last_exp(log_dir):
    exp_numbers = []
    if not os.path.exists(log_dir):
        return 0
    for file in os.listdir(log_dir):
        if "exp" not in file:
            continue
        else:
            exp_numbers.append(int(file.split("_")[1]))
    return max(exp_numbers) if len(exp_numbers) > 0 else 0


In [0]:
def main():
    # Initialize tempdir

    if action == "train":

        # Initialize data
        G = graphs.Community(num_vertices, seed=seed)
        G.compute_laplacian("normalized")
        
        # Prepare pooling
        num_levels = _number_of_pooling_levels(vertex_poolings)
        error = True
        while error:
            try:
                adjacencies, perm = coarsen(G.A, levels=num_levels)  # Coarsens in powers of 2
                error = False
            except IndexError:
                error = True
                continue

        L = [initialize_laplacian_tensor(A) for A in adjacencies]
        L = keep_pooling_laplacians(L, vertex_poolings)

    elif action == "eval":
        W = np.load(os.path.join(log_dir, "graph_weights.npy"))
        G = graphs.Graph(W)
        G.compute_laplacian("normalized")

    if action == "train":
        #G_nx = nx.from_numpy_matrix(G.W.todense())
        if load_data:
            #num test can't exceed saved array size
            train_data, train_labels = load_from_npy('train',num_train)

        else:     
            random.seed(seed)
            train_data, train_labels = epidemics_generator(
                g_nx = nx.from_numpy_matrix(G.W.todense()),
                batch_size = num_train,
                timesteps=num_frames,
                initial_nodes = random.sample(range(num_vertices), int(num_vertices/10))
            )
            save_to_npy('train',train_data, train_labels)

        train_data = perm_data(train_data, perm)
        train_mb_source = MinibatchSource(train_data, train_labels, repeat=True)

    if load_data:
            #num test can't exceed saved array size
            test_data, test_labels = load_from_npy('test',num_test)
           
    else:     
        test_data, test_labels = epidemics_generator(
                g_nx = nx.from_numpy_matrix(G.W.todense()),
                batch_size = num_test,
                timesteps=num_frames,
                initial_nodes = random.sample(range(num_vertices), int(num_vertices/10))
            )
        save_to_npy('test',test_data, test_labels)

    test_data = perm_data(test_data, perm)
    test_mb_source = MinibatchSource(test_data, test_labels, repeat=False)

    if action == "train":
        #params = vars(FLAGS)
        #with open(os.path.join(log_dir, "params.json"), "w") as f:
        #    json.dump(params, f)

        # Run training and evaluation loop
        print("Training model...")
        run_training(L, train_mb_source, test_mb_source)
    elif action == "eval":
        print("Evaluating model...")
        run_eval(test_mb_source)
    else:
        raise ValueError("No valid action selected")


In [7]:
main()

2019-10-16 22:29:55,642:[INFO](pygsp.graphs.community.__init__): Constructed using eps-NN with eps = 3.34370152488211


Layer 0: M_0 = |V| = 536 nodes (36 added),|E| = 3686 edges
Layer 1: M_1 = |V| = 268 nodes (9 added),|E| = 1890 edges
Layer 2: M_2 = |V| = 134 nodes (3 added),|E| = 863 edges
Layer 3: M_3 = |V| = 67 nodes (0 added),|E| = 473 edges
[1.6408342]
[1.549751]
[1.4593617]
[1.3968053]

Generating epidemics


6000

 
 Epidemics Data Generated. Shape: (6000, 500, 128, 1)
(6000,)

Generating epidemics


2000

 
 Epidemics Data Generated. Shape: (2000, 500, 128, 1)
(2000,)
Training model...
Training deep FIR-TV model...
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related 