In [1]:
import os
from os.path import join
from pathlib import Path
cur_dir = Path(os.getcwd())
par_dir = cur_dir.parent.absolute()

import sys
import time
import datetime
import numpy as np
import pickle as pkl
import tensorflow as tf
import scipy.sparse as sps

sys.path.append(str(par_dir))
from utils import *
from tensorflow.python.saved_model import tag_constants
from models import scGCN
# sys.stdout = open(str(cur_dir.joinpath('PBMC_lr=0.05_eps=50_outputs.txt')), "w")

import warnings
warnings.filterwarnings("ignore")
#' del_all_flags(FLAGS)

# Set random seed
seed = 123
np.random.seed(seed)
tf.compat.v1.set_random_seed(seed)
tf.set_random_seed(seed)

In [2]:
# Settings
exp_id = 'HumanFetal_200k'
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('f', '', 'kernel')  # to run in jupyter kernels
flags.DEFINE_string('dataset', join(str(par_dir), f'input/{exp_id}'), 'data dir')
flags.DEFINE_string('output', join(str(cur_dir), f'{exp_id}_results'), 'predicted results')
flags.DEFINE_bool('graph', True, 'select the optional graph.')
flags.DEFINE_string('model', 'scGCN','Model string.') 
flags.DEFINE_float('learning_rate', 0.05, 'Initial learning rate.')
flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.')
#flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_float('dropout', 0, 'Dropout rate (1 - keep probability).')
flags.DEFINE_float('weight_decay', 0,
                   'Weight for L2 loss on embedding matrix.')
flags.DEFINE_integer('early_stopping', 10,
                     'Tolerance for early stopping (# of epochs).')
flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.')

# output flow
# sys.stdout = open(str(cur_dir.joinpath(f'{exp_id}_outputs.txt')), "w")

<absl.flags._flagvalues.FlagHolder at 0x7f822f8fb908>

In [4]:
# Load data
adj, features, labels_binary_train, labels_binary_val, labels_binary_test, train_mask, pred_mask, val_mask, test_mask, new_label, true_label, index_guide = load_data(
    FLAGS.dataset,rgraph=FLAGS.graph)

support = [preprocess_adj(adj)]
num_supports = 1
model_func = scGCN

# Define placeholders
placeholders = {
    'support':
    [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
    'features':
    tf.sparse_placeholder(tf.float32,
                          shape=tf.constant(features[2], dtype=tf.int64)),
    'labels':
    tf.placeholder(tf.float32, shape=(None, labels_binary_train.shape[1])),
    'labels_mask':
    tf.placeholder(tf.int32),
    'dropout':
    tf.placeholder_with_default(0., shape=()),
    'num_features_nonzero':
    tf.placeholder(tf.int32)  # helper variable for sparse dropout
}

# Create model
model = model_func(placeholders, input_dim=features[2][1], logging=True)

# Define model evaluation function
def evaluate(features, support, labels, mask, placeholders):
    t_test = time.time()
    feed_dict_val = construct_feed_dict(features, support, labels, mask,
                                        placeholders)
    outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
    return outs_val[0], outs_val[1], (time.time() - t_test)

load data succesfully....
Constructing adjaceny graph
assign input coordinatly....






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [6]:
# Initialize session
sess = tf.Session()
# Init variables
sess.run(tf.global_variables_initializer())

train_accuracy = []
train_loss = []
val_accuracy = []
val_loss = []
test_accuracy = []
test_loss = []

# Train model

#configurate checkpoint directory to save intermediate model training weights
saver = tf.train.Saver()
save_dir = str(cur_dir.joinpath(f'{exp_id}_checkpoints/'))
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

save_path = os.path.join(save_dir, 'best_validation')

for epoch in range(FLAGS.epochs):
    t = time.time()
    # Construct feed dictionary
    feed_dict = construct_feed_dict(features, support, labels_binary_train,
                                    train_mask, placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
    # Training step
    outs = sess.run([model.opt_op, model.loss, model.accuracy],
                    feed_dict=feed_dict)
    train_accuracy.append(outs[2])
    train_loss.append(outs[1])
    # Validation
    cost, acc, duration = evaluate(features, support, labels_binary_val,
                                   val_mask, placeholders)
    val_loss.append(cost)
    val_accuracy.append(acc)
    test_cost, test_acc, test_duration = evaluate(features, support,
                                                  labels_binary_test,
                                                  test_mask, placeholders)
    test_accuracy.append(test_acc)
    test_loss.append(test_cost)
    saver.save(sess=sess, save_path=save_path)
    print(f'=========== epoch={epoch+1}')
#     print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
#           "{:.5f}".format(outs[1]), "train_acc=", "{:.5f}".format(outs[2]),
#           "val_loss=", "{:.5f}".format(cost), "val_acc=", "{:.5f}".format(acc),
#           "time=", "{:.5f}".format(time.time() - t))
    if epoch > FLAGS.early_stopping and val_loss[-1] > np.mean(
            val_loss[-(FLAGS.early_stopping + 1):-1]):
        print("Early stopping...")
        break
    
    if (epoch+1) % 20 == 0:
        all_mask = np.array([True] * len(train_mask))
        labels_binary_all = new_label

        feed_dict_all = construct_feed_dict(features, support, labels_binary_all,
                                            all_mask, placeholders)
        feed_dict_all.update({placeholders['dropout']: FLAGS.dropout})

        activation_output = sess.run(model.activations, feed_dict=feed_dict_all)[1]
        predict_output = sess.run(model.outputs, feed_dict=feed_dict_all)

        #' accuracy on all masks
        ab = sess.run(tf.nn.softmax(predict_output))
        all_prediction = sess.run(
            tf.equal(sess.run(tf.argmax(ab, 1)),
                     sess.run(tf.argmax(labels_binary_all.astype('int32'), 1))))

        #' accuracy on prediction masks 
        acc_train = np.sum(all_prediction[train_mask]) / np.sum(train_mask)
        acc_test = np.sum(all_prediction[test_mask]) / np.sum(test_mask)
        acc_val = np.sum(all_prediction[val_mask]) / np.sum(val_mask)
        acc_pred = np.sum(all_prediction[pred_mask]) / np.sum(pred_mask)
        print('Checking train/test/val set accuracy: {}, {}, {}'.format(
            acc_train, acc_test, acc_val))
        print('Checking pred set accuracy: {}'.format(acc_pred))

print("Finished Training....")

Checking train/test/val set accuracy: 0.7202557449962935, 0.72, 0.7147999446213484
Checking pred set accuracy: 0.20033333333333334
Checking train/test/val set accuracy: 0.9225969854212998, 0.9193769470404984, 0.9188702755087914
Checking pred set accuracy: 0.26145
Checking train/test/val set accuracy: 0.949916604892513, 0.9413084112149532, 0.9450366883566386
Checking pred set accuracy: 0.3326
Checking train/test/val set accuracy: 0.9613448233259204, 0.948785046728972, 0.9519590197978679
Checking pred set accuracy: 0.3559
Checking train/test/val set accuracy: 0.9686805040770942, 0.9558878504672897, 0.9556970787761317
Checking pred set accuracy: 0.37955833333333333
Checking train/test/val set accuracy: 0.9739004200642452, 0.9583800623052959, 0.9586044579814481
Checking pred set accuracy: 0.3901
Checking train/test/val set accuracy: 0.9783944897454905, 0.959626168224299, 0.9601273708985186
Checking pred set accuracy: 0.39535
Checking train/test/val set accuracy: 0.981359649122807, 0.959875

In [7]:
all_mask = np.array([True] * len(train_mask))
labels_binary_all = new_label

feed_dict_all = construct_feed_dict(features, support, labels_binary_all,
                                    all_mask, placeholders)
feed_dict_all.update({placeholders['dropout']: FLAGS.dropout})

activation_output = sess.run(model.activations, feed_dict=feed_dict_all)[1]
predict_output = sess.run(model.outputs, feed_dict=feed_dict_all)

#' accuracy on all masks
ab = sess.run(tf.nn.softmax(predict_output))
all_prediction = sess.run(
    tf.equal(sess.run(tf.argmax(ab, 1)),
             sess.run(tf.argmax(labels_binary_all.astype('int32'), 1))))

#' accuracy on prediction masks 
acc_train = np.sum(all_prediction[train_mask]) / np.sum(train_mask)
acc_test = np.sum(all_prediction[test_mask]) / np.sum(test_mask)
acc_val = np.sum(all_prediction[val_mask]) / np.sum(val_mask)
acc_pred = np.sum(all_prediction[pred_mask]) / np.sum(pred_mask)
print('Checking train/test/val set accuracy: {}, {}, {}'.format(
    acc_train, acc_test, acc_val))
print('Checking pred set accuracy: {}'.format(acc_pred))

Checking train/test/val set accuracy: 0.9829503335804299, 0.9601246105919004, 0.9612349439291153
Checking pred set accuracy: 0.3989333333333333


In [8]:
acc_pred, acc_train, acc_val

(0.3989333333333333, 0.9829503335804299, 0.9612349439291153)