In [1]:
from __future__ import division
from __future__ import print_function

import time
import tensorflow.compat.v1 as tf 

from utils import *
from models import GCN, MLP
import pickle as pkl
import sys
import scipy.sparse as sp
path = 'data/'
dataset_str = 'Amazon'

# Set random seed
seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

# Settings
flags = tf.app.flags
tf.app.flags.DEFINE_string('f', '', 'kernel')
tf.compat.v1.disable_eager_execution()
FLAGS = flags.FLAGS
flags.DEFINE_string('dataset', 'Yelp', 'Dataset string.')  # 'cora', 'citeseer', 'pubmed', 'yelp', 'amazone'
flags.DEFINE_string('model', 'gcn', 'Model string.')  # 'gcn', 'gcn_cheby', 'dense'
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.')
flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')

# # Load data
# adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(FLAGS.dataset)
"""
Loads input data from gcn/data directory

ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
    (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
    object;
ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.

All objects above must be saved using python pickle module.

:param dataset_str: Dataset name
:return: All data input files loaded (as well the training/test data).
"""
# train.py
# Define model evaluation function
def evaluate(features, support, labels, mask, placeholders):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, support, labels, mask, placeholders)
    outs_val = sess.run([model.loss, model.accuracy, model.precision, model.recall], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], outs_val[2], outs_val[3], (time.time() - t_test)


def construct_feed_dict(features, support, labels, labels_mask, placeholders):
    """Construct feed dictionary."""
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['labels_mask']: labels_mask})
    feed_dict.update({placeholders['features']: features})
    feed_dict.update({placeholders['support'][i]: support[i] for i in range(len(support))})
    feed_dict.update({placeholders['num_features_nonzero']: features[1].shape})
    return feed_dict

# utils.py
def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx
    
#utils
def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index
#utils
def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)

#utils
def preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(0))
    r_inv = np.power(rowsum, -1).flatten()
    # r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features_ch = features.dot(r_mat_inv)
    return sparse_to_tuple(features_ch)

#utils
def preprocess_adj(adj):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
    return sparse_to_tuple(adj_normalized)

names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []
for i in range(len(names)):
    with open(path+"ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
        if sys.version_info > (3, 0):
            objects.append(pkl.load(f, encoding='latin1'))
        else:
            objects.append(pkl.load(f))
            
x, y, tx, ty, allx, ally, graph = tuple(objects)
test_idx_reorder = parse_index_file(path+"ind.{}.test.index".format(dataset_str))
test_idx_range = np.sort(test_idx_reorder)
features = sp.vstack((allx, tx)).tolil()
features[test_idx_reorder, :] = features[test_idx_range, :]
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
labels = np.vstack((ally, ty))
labels[test_idx_reorder, :] = labels[test_idx_range, :]

idx_test = test_idx_range.tolist()
idx_train = range(len(y))
idx_val = range(len(y), len(y)+500)

train_mask = sample_mask(idx_train, labels.shape[0])
val_mask = sample_mask(idx_val, labels.shape[0])
test_mask = sample_mask(idx_test, labels.shape[0])

y_train = np.zeros(labels.shape)
y_val = np.zeros(labels.shape)
y_test = np.zeros(labels.shape)
y_train[train_mask, :] = labels[train_mask, :]
y_val[val_mask, :] = labels[val_mask, :]
y_test[test_mask, :] = labels[test_mask, :]


# print(features[0:10])
# Some preprocessing
features_ch = preprocess_features(features)
# print(features_ch[2])

if FLAGS.model == 'gcn':
    support = [preprocess_adj(adj)]
    num_supports = 1
    model_func = GCN
elif FLAGS.model == 'gcn_cheby':
    support = chebyshev_polynomials(adj, FLAGS.max_degree)
    num_supports = 1 + FLAGS.max_degree
    model_func = GCN
elif FLAGS.model == 'dense':
    support = [preprocess_adj(adj)]  # Not used
    num_supports = 1
    model_func = MLP
else:
    raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

# Define placeholders
placeholders = {
    'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features_ch[2], dtype=tf.int64)),
    'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
    'labels_mask': tf.placeholder(tf.int32),
    'dropout': tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero': tf.placeholder(tf.int32)  # helper variable for sparse dropout
}
# Create model
model = model_func(placeholders, input_dim=features_ch[2][1], logging=True)

# Initialize session
sess = tf.Session()

# Init variables
sess.run(tf.global_variables_initializer())

cost_val = []

# Train model
for epoch in range(FLAGS.epochs):

    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features_ch, support, y_train, train_mask, placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})

    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)

    # Validation
    cost, acc, pre, recall, duration = evaluate(features_ch, support, y_val, val_mask, placeholders)
    cost_val.append(cost)

    # Print results
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
          "train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
          "val_acc=", "{:.5f}".format(acc), "val_pre=", "{:.5f}".format(pre), "time=", "{:.5f}".format(time.time() - t))

    if epoch > FLAGS.early_stopping and cost_val[-1] > np.mean(cost_val[-(FLAGS.early_stopping+1):-1]):
        print("Early stopping...")
        break
print("Optimization Finished!")

# Testing
test_cost, test_acc, test_pre, test_recall, test_duration = evaluate(features_ch, support, y_test, test_mask, placeholders)
print("Test set results:", "cost=", "{:.5f}".format(test_cost),
      "accuracy=", "{:.5f}".format(test_acc), "precision=", "{:.5f}".format(test_pre),"recall=","{:.5f}".format(test_recall),"time=", "{:.5f}".format(test_duration))



Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Epoch: 0001 train_loss= 0.69464 train_acc= 0.74306 val_loss= 0.69457 val_acc= 0.78600 val_pre= 0.80042 time= 0.09273
Epoch: 0002 train_loss= 0.69436 train_acc= 0.75142 val_loss= 0.69426 val_acc= 0.78800 val_pre= 0.79234 time= 0.00807
Epoch: 0003 train_loss= 0.69416 train_acc= 0.75409 val_loss= 0.69396 val_acc= 0.78800 val_pre= 0.79234 time= 0.00695
Epoch: 0004 train_loss= 0.69375 train_acc= 0.75789 val_loss= 0.69366 val_acc= 0.79000 val_pre= 0.79158 time= 0.00733
Epoch: 0005 train_loss= 0.69332 train_acc= 0.76967 val_loss= 0.69337 val_acc= 0.79200 val_pre= 0.79200 time= 0.00700
Epoch: 0006 train_loss= 0.69316 train_acc= 0.77157 val_loss= 0.69308 val_acc= 0.79200 val_pre= 0.79200 time= 

Epoch: 0082 train_loss= 0.65872 train_acc= 0.77385 val_loss= 0.65708 val_acc= 0.79200 val_pre= 0.79200 time= 0.00727
Epoch: 0083 train_loss= 0.65700 train_acc= 0.77385 val_loss= 0.65637 val_acc= 0.79200 val_pre= 0.79200 time= 0.00748
Epoch: 0084 train_loss= 0.65866 train_acc= 0.77385 val_loss= 0.65567 val_acc= 0.79200 val_pre= 0.79200 time= 0.00756
Epoch: 0085 train_loss= 0.65210 train_acc= 0.77385 val_loss= 0.65496 val_acc= 0.79200 val_pre= 0.79200 time= 0.00744
Epoch: 0086 train_loss= 0.65660 train_acc= 0.77385 val_loss= 0.65426 val_acc= 0.79200 val_pre= 0.79200 time= 0.00696
Epoch: 0087 train_loss= 0.65285 train_acc= 0.77385 val_loss= 0.65355 val_acc= 0.79200 val_pre= 0.79200 time= 0.00702
Epoch: 0088 train_loss= 0.65192 train_acc= 0.77385 val_loss= 0.65284 val_acc= 0.79200 val_pre= 0.79200 time= 0.00710
Epoch: 0089 train_loss= 0.65054 train_acc= 0.77385 val_loss= 0.65214 val_acc= 0.79200 val_pre= 0.79200 time= 0.00727
Epoch: 0090 train_loss= 0.65172 train_acc= 0.77385 val_loss= 0.6

Epoch: 0163 train_loss= 0.61844 train_acc= 0.77385 val_loss= 0.61543 val_acc= 0.79200 val_pre= 0.79200 time= 0.00741
Epoch: 0164 train_loss= 0.62640 train_acc= 0.77385 val_loss= 0.61519 val_acc= 0.79200 val_pre= 0.79200 time= 0.00770
Epoch: 0165 train_loss= 0.62594 train_acc= 0.77385 val_loss= 0.61495 val_acc= 0.79200 val_pre= 0.79200 time= 0.00721
Epoch: 0166 train_loss= 0.61624 train_acc= 0.77385 val_loss= 0.61470 val_acc= 0.79200 val_pre= 0.79200 time= 0.00750
Epoch: 0167 train_loss= 0.62536 train_acc= 0.77385 val_loss= 0.61447 val_acc= 0.79200 val_pre= 0.79200 time= 0.00704
Epoch: 0168 train_loss= 0.61920 train_acc= 0.77385 val_loss= 0.61424 val_acc= 0.79200 val_pre= 0.79200 time= 0.00705
Epoch: 0169 train_loss= 0.62798 train_acc= 0.77385 val_loss= 0.61401 val_acc= 0.79200 val_pre= 0.79200 time= 0.00693
Epoch: 0170 train_loss= 0.62195 train_acc= 0.77385 val_loss= 0.61379 val_acc= 0.79200 val_pre= 0.79200 time= 0.00720
Epoch: 0171 train_loss= 0.62075 train_acc= 0.77385 val_loss= 0.6

In [13]:
temp_dic = {}
temp_list = [[1, 2, 3], [4, 5, 5]]
temp_dic[1] = {}
temp_dic[1]['saeed'] = 2
print(temp_dic)

{1: {'saeed': 2}}
