In [1]:
import pandas as pd
import numpy as np
import scipy
import time
import sys
# Import T. Kipf's GCN implementation
# https://github.com/tkipf/gcn
sys.path.append('../gcn/gcn/')
from utils import *
import random
from models import *

  return f(*args, **kwds)


### Load data, configuration and preprocessing

In [2]:
# Settings
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')

In [67]:
W = pd.read_pickle('adjacency_small.pkl') 
H = pd.read_hdf('history_small.hdf', key='hist') 

In [68]:
# Translate values to have a sparser matrix
H[H == 0] = 100
H[H == 1] = 101
H[H == -100] = 0

In [69]:
## Preprocessing History Matrix (n x m)
# Converting to float for matrix powers
H = H.astype(np.float16)
# Thresholding
W[W<0.3] = 0
# Convert to lil - matrix
H = scipy.sparse.lil.lil_matrix(H.values)

In [70]:
## Preprocessing Adjacency Matrix (n x n)
W = scipy.sparse.csr_matrix(W.values)
support = [preprocess_adj(W)]
num_supports = 1
model_func = GCN

In [74]:
## TO DO :
## How to define the correct placeholder corresponding to the ones
## used by T. Kipf : 
## labels = H.values.flatten() ie a n x m vector ? What's the point to use H.values.flatten() and H ?
## features = H ?

In [73]:
# Define placeholders
placeholders = {
    #'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    #'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(H[2], dtype=tf.int64)),
    #'labels': tf.placeholder(tf.float32, shape=(None, y_train_.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
}

### Model

In [75]:
# Create model
model = model_func(placeholders, input_dim=y_train_[2][1], logging=True)

# Initialize session
sess = tf.Session()

In [17]:
# Define model evaluation function
def evaluate(features, support, labels, mask, placeholders):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
    outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], (time.time() - t_test)

### Training

In [76]:
# Init variables
sess.run(tf.global_variables_initializer())

cost_val = []

# Train model
for epoch in range(FLAGS.epochs):

    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features_, support_, y_train_, train_mask_, placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})

    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

    # Validation
    cost, acc, duration = evaluate(features_, support_, y_train_, train_mask_, placeholders)
    cost_val.append(cost)

    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
          "val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))

    if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
        print("Early stopping...")
        break