# XOR problem

## 1. Preliminaries
### 1.1. Imports
We begin by importing several python libraries:

In [1]:
%%bash
rm -rf logdirs

In [2]:
import os
import pickle
import tensorflow as tf
import numpy as np
from collections import OrderedDict, namedtuple
print('tensorflow version: {}'.format(tf.__version__))
print('numpy version: {}'.format(np.__version__))
print('current working directory: {}'.format(os.getcwd()))

tensorflow version: 1.3.0
numpy version: 1.12.1
current working directory: /Users/alexten/Projects/pdpyflow/xor


### 1.2. Functions and Classes

In [3]:
def snap2pickle(logdir, snap):
    path = '/'.join([logdir,'snap.pkl'])
    try:
        with open(path, 'rb') as old_file:
            old_snap = pickle.load(old_file)
        with open(path, 'wb') as old_file:
            old_snap.append(snap)
            pickle.dump(old_snap, old_file)
    except FileNotFoundError:
        with open(path, 'wb') as new_file:
            out = pickle.dump([snap], new_file)


class InputData(object):
    '''
    DOCUMENTATION
    '''
    def __init__(self, path_to_data_file, num_epochs, batch_size, inp_size, targ_size, data_len,
                 shuffle = False, shuffle_seed = None):
        # Store useful params
        self.path = path_to_data_file
        self.batch_size = batch_size
        self.inp_size = [inp_size] if isinstance(inp_size, int) else inp_size
        self.targ_size = targ_size
        self.data_len = data_len

        # setup filename queue
        filename_queue = tf.train.string_input_producer(string_tensor = [path_to_data_file], shuffle = False)

        # create reader and setup default values to read from files in the filename queue
        reader = tf.TextLineReader(skip_header_lines=True, name='csv_reader')
        _, record_strings = reader.read_up_to(filename_queue, num_records=data_len)
        defaults = [[0.0] for x in range(sum(self.inp_size) + targ_size)]
        defaults.insert(0,[''])
        
        # decode in all lines
        examples = tf.decode_csv(record_strings, record_defaults=defaults)

        # slice the decoded lines and stack them into respective tensors
        pattern_labels = tf.transpose(examples.pop(0))
        input_patterns = []
        start = 0
        for size in self.inp_size:
            input_patterns.append(
                tf.transpose(tf.stack(examples[start:start+size]))
            )
            start += size
        target_patterns = tf.transpose(tf.stack(examples[sum(self.inp_size):sum(self.inp_size) + targ_size]))

        # enqueue lines into an examples queue (optionally shuffle)
        tensor_list =  [pattern_labels]+input_patterns+[target_patterns]
        examples_slice = tf.train.slice_input_producer(
            tensor_list = tensor_list,
            num_epochs = num_epochs,
            shuffle = shuffle,
            seed = shuffle_seed,
            capacity = data_len
        )

        # set up a batch queue using the enqueued (optionally shuffled) examples
        self.examples_batch = tf.train.batch(
            tensors = examples_slice,
            batch_size = batch_size,
            capacity = batch_size
        )


class BasicLayer(object):
    '''
    DOCUMENTATION
    '''
    def __init__(self, layer_name, layer_input, size, wrange, nonlin=None, bias=True, seed=None, sparse_inp=False):
        self.name = layer_name
        with tf.variable_scope(layer_name):

            if isinstance(layer_input, (list, tuple)):
                self.input_ = tf.concat(axis=1, values=[i for i in layer_input])
                input_size = sum([inp._shape[1]._value for inp in layer_input])
            else:
                self.input_ = layer_input
                input_size = layer_input._shape[1]._value

            weight_init = tf.random_uniform(
                minval = wrange[0], 
                maxval = wrange[1],
                seed = seed,
                shape = [input_size, size],
                dtype=tf.float32
            )
            self.weights = tf.get_variable(name='weights', initializer=weight_init)

            self.biases = 0
            if bias:
                bias_init = tf.random_uniform(
                    minval = wrange[0],
                    maxval = wrange[1],
                    seed = seed,
                    shape = [size],
                    dtype = tf.float32
                )
                self.biases = tf.get_variable('biases', initializer=bias_init)

        with tf.name_scope(layer_name):
            with tf.name_scope('net_input'):
                self.net_input = tf.matmul(self.input_, self.weights, a_is_sparse=sparse_inp) + self.biases

            with tf.name_scope('activations'):
                self.nonlin = nonlin
                if nonlin:
                    self.output = nonlin(self.net_input)
                else:
                    self.output = self.net_input
    
    def add_gradient_ops(self, loss):
        with tf.name_scope(self.name):
            item_keys = ['net_input', 'activation', 'weights']
            items = [self.net_input, self.output, self.weights]
            if self.biases: 
                item_keys.append('biases')
                items.append(self.biases)
            grad_list = tf.gradients(loss, items)
            grad_list_with_keys = [val for pair in zip(item_keys, grad_list) for val in pair]
            self.gradient = {k:v for k,v in zip(*[iter(grad_list_with_keys)]*2)}
            
            for grad_op, str_key in zip(grad_list, item_keys):
                self.__dict__['g{}'.format(str_key)] = grad_op
    
    def fetch_test_ops(self):
        fetch_items = ['weights', 'biases', 'net_input', 'activation',
                       'gweights', 'gbiases', 'gnet_input', 'gactivation']
        fetch_ops = {}
        for fi in fetch_items:
            if fi in self.__dict__.keys():
                fetch_ops[fi] = self.__dict__[fi]
        return fetch_ops, self.name


class FFBPModel(object):
    '''
    DOCUMENTATION
    '''
    def __init__(self, name, loss, optimizer, layers, inp, targ, train_data=None, test_data=None):
        self.name = name
        self.loss = loss

        self.optimizer = optimizer
        self._global_step = tf.Variable(0, name='global_step', trainable=False)
        self._step_incrementer = tf.assign_add(self._global_step, 1, name='global_step_incrementer')
        self._train_step = self.optimizer.minimize(loss=self.loss, global_step=None)

        self.layers = layers
        for layer in self.layers: 
            layer.add_gradient_ops(loss=self.loss)

        self.inp = [inp] if not isinstance(inp, (list, tuple)) else inp
        self.targ = targ
        self.inp_labels = tf.placeholder(shape=(), dtype=tf.string)

        self.data = {'Test': test_data, 'Train': train_data}

        self._prev_param = {}

        if train_data:  
            self.data['Train'] = train_data
            self._train_fetches = {
                'loss': self.loss,
                'train_step': self._train_step,
            }

        if test_data:
            self.test_data = self.data['Test'] = test_data
            self._test_fetches = {
                'loss'  : self.loss,
                'enum'  : self._global_step,
                'labels': self.inp_labels,
                'input' : tf.concat(self.inp, axis=1) if len(self.inp) > 1 else self.inp[0],
                'target': self.targ
            }

    def test_epoch(self, session, verbose=False):
        assert self.data['Test'] is not None, 'Provide test data to run a test epoch'
        data = self.data['Test']
        snap = {}
        with tf.name_scope('Test'):
            all_examples = session.run(data.examples_batch)
            loss_sum = 0
            for example in zip(*all_examples):

                # Put together lists of placeholders and values
                placeholders = [self.inp_labels]+self.inp+[self.targ]
                values = [example[0]]+[np.expand_dims(vec,0) for vec in example[1:]]
                # Interleave the two lists to be comprehended by dict() constructor
                feed_list = [val for pair in zip(placeholders, values) for val in pair]
                # Construct a feed_dict with appropriately paired placeholders and feed values
                feed_dict = dict(feed_list[i:i + 2] for i in range(0, len(feed_list), 2))

                # Run graph to evaluate test fetches
                test_out = session.run(
                    fetches = self._test_fetches, 
                    feed_dict = feed_dict
                )

                # Store network-level snap items: enum, loss, labels, input, target
                for k, v in test_out.items():
                    if k=='enum':
                        snap[k] = v
                    elif k not in snap.keys(): 
                        snap[k] = np.expand_dims(v, axis=0)
                    else:
                        snap[k] = np.concatenate([snap[k], np.expand_dims(v, axis=0)], axis=0)

                # Store layer-level snap items: weights, biases, net_input, activations and gradients
                for layer in self.layers:
                    layer_fetches, layer_name = layer.fetch_test_ops()
                    snap.setdefault(layer_name, {})
                    layer_out = session.run(
                        fetches = layer_fetches, 
                        feed_dict = feed_dict
                    )

                    for k, v in layer_out.items():
                        if k=='weights' or k=='biases':
                            snap[layer_name][k] = v
                            # TODO: Include dweights and dbiases (weight change applied without the momentum term)
                            # if snap['enum'] == 0:
                            #     self._prev_param[k] = v
                            #     snap[layer_name]['d{}'.format(k)] = v*0
                            # else:
                            #     snap[layer_name]['d{}'.format(k)] =  v - self._prev_param[k]
                        elif k not in snap[layer_name].keys():
                            snap[layer_name][k] = np.expand_dims(v,axis = 0)
                        else:
                            snap[layer_name][k] = np.concatenate([snap[layer_name][k], np.expand_dims(v, axis=0)], axis=0)
                loss_sum += test_out['loss']

            if verbose:
                print('epoch {}: {}'.format(tf.train.global_step(session, self._global_step), loss_sum))
            return loss_sum, snap

    def train_epoch(self, session, verbose=False):
        assert self.data['Train'] is not None, 'Provide train data to run a train epoch'
        data = self.data['Train']
        epoch_loss = 0
        with tf.name_scope('Train'):
            for mini_batch in range(data.data_len // data.batch_size):
                examples_batch = session.run(data.examples_batch)
                feed_list = [val for pair in zip(self.inp + [self.targ], examples_batch[1:]) for val in pair]
                feed_dict = dict(feed_list[i:i + 2] for i in range(0, len(feed_list), 2))
                evaled_ops = session.run(
                    fetches = self._train_fetches,
                    feed_dict = feed_dict
                )
                epoch_loss += evaled_ops['loss']

        if verbose:
            print('epoch {}: {}'.format(tf.train.global_step(session, self._global_step), epoch_loss))

        session.run(self._step_incrementer)
        return epoch_loss
        

def use_exercise_params(use):
    if use:
        all_vars = tf.global_variables()
        hidden_W = [v for v in all_vars if 'hidden_layer/weights' in v.name][0]
        hidden_b = [v for v in all_vars if 'hidden_layer/biases' in v.name][0]
        output_W = [v for v in all_vars if 'output_layer/weights' in v.name][0]
        output_b = [v for v in all_vars if 'output_layer/biases' in v.name][0]
        restore_dict = {'w_1': hidden_W,'b_1': hidden_b,'w_2': output_W,'b_2': output_b}
        tf.train.Saver(restore_dict, name='xor_exercise_saver').restore(tf.get_default_session(), 'exercise_params_old/exercise_params')

        
def new_logdir():
    i=0
    logdir = os.getcwd() + '/logdirs/ffbp_logdir_000'
    while os.path.exists(logdir):
        i+=.001
        logdir = os.getcwd() + '/logdirs/ffbp_logdir_{}'.format(str(i)[2:5])
    os.makedirs(logdir)
    print('logdir path: {}'.format(logdir))
    return logdir


def save(sess, saver, logdir, model):
    save_to = '/'.join([logdir,'checkpoint_files',model.name+'.ckpt'])
    save_path = saver.save(sess, save_to)
    print("Model saved in: {}".format(save_path))
    
    
def init_vars(session, checkpoint_dir=None):
#     tf.reset_default_graph()
    '''
    Returns tf.train.Saver that can be used to checkpoint the graph
    '''
#     write_version=tf.train.SaverDef.V2
    saver = tf.train.Saver(name='model_loader')
    if checkpoint_dir:
        checkpoint_dir = os.path.join(os.getcwd(), checkpoint_dir)
        saved_files = os.listdir(checkpoint_dir)
        for file in saved_files:
            if 'ckpt.meta' in file:
                metagraphdef = file.split(sep='.')[0]
                print('trying to restore from this file:') #       <---------------
                print(os.path.join(checkpoint_dir, metagraphdef))# <---------------
                saver.restore(session, os.path.join(checkpoint_dir, metagraphdef))
                continue
        print('Restoring variables from {}'.format(checkpoint_dir))
    else:
        session.run(tf.local_variables_initializer())
        session.run(tf.global_variables_initializer())
        print('Initializing variables from scratch')
    return saver

## 2. Construction

In [4]:
# TRAIN CONFIGS
num_epochs = 330
batch_size = 2
inp_size = 2
targ_size = 1
data_len = 4

lr = 0.5
m = 0.9

xor_graph = tf.Graph()

with xor_graph.as_default():
    
    with tf.name_scope('train_data'):
        train_examples = InputData(
            path_to_data_file = 'train_data_B.txt',
            num_epochs = num_epochs,
            batch_size = batch_size, 
            inp_size = 2, 
            targ_size = 1,
            data_len = data_len,
            shuffle = True, 
            shuffle_seed = 1
        )

    with tf.name_scope('test_data'):
        test_examples = InputData(
            path_to_data_file = 'train_data_B.txt',
            num_epochs = num_epochs,
            batch_size = data_len,
            inp_size = 2, 
            targ_size = 1,
            data_len = data_len,
            shuffle = False
        )

    # NETWORK CONSTRUCTION
    model_name = 'xor_model'
    with tf.name_scope(model_name):

        model_inp  = tf.placeholder(dtype = tf.float32, shape=[None, inp_size], name='model_inp')

        hidden_layer = BasicLayer(
            layer_name = 'hidden_layer', 
            layer_input = model_inp, 
            size = 2, 
            wrange = [-1,1], 
            nonlin=tf.nn.sigmoid, 
            bias=True, 
            seed=1, # Use None for random seed value
            sparse_inp=False
        )

        output_layer = BasicLayer(
            layer_name = 'output_layer', 
            layer_input = hidden_layer.output, 
            size = 1, 
            wrange = [-1,1], 
            nonlin=tf.nn.sigmoid, 
            bias=True, 
            seed=1, # Use None for random seed value
            sparse_inp=False
        )

        target = tf.placeholder(dtype = tf.float32, shape=[None, targ_size], name='targets')

        model = FFBPModel(
            name = model_name,
            layers = [hidden_layer, output_layer],
            train_data = train_examples, 
            inp        = model_inp,
            targ       = target,
            loss       = tf.reduce_sum(tf.squared_difference(target, output_layer.output), name='loss_function'),
            optimizer  = tf.train.MomentumOptimizer(lr, m),
            test_data  = test_examples
        )

## 3. Running and saving model
This part should be general (not edited much by the user)

In [5]:
test_epochs = [0,1,3,5,30,60,180,300]

with tf.Session(graph=xor_graph) as sess:
    # initialize variables
    # TODO create (init) or restore model here
    logdir = new_logdir()
    ckpt = None#'exercise_params'
    saver = init_vars(session=sess, checkpoint_dir=ckpt)
    use_exercise_params(True)
    
    # create coordinator and queue runners
    coordinator = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coordinator)
    
#     for i in range(num_epochs+1): #for i in range(start_epoch, num_epochs+1):
#         # Do planned tests and test when num_epochs is reached
#         if any([i==test_epoch for test_epoch in test_epochs]):
#             loss, snap = model.test_epoch(session=sess, verbose=True)
#             snap2pickle(logdir, snap)
            
#         # Run training epoch
#         loss = model.train_epoch(session=sess, verbose=False)
        
#         # Test and break if loss < ecrit
#         if loss < ecrit: 
#             loss, snap = model.test_epoch(session=sess, verbose=True)
#             snap2pickle(logdir, snap)
            
#             coordinator.request_stop()
#             coordinator.join(threads)
            
#             print('Stopped training due to loss < ecrit')
#             break
    save(sess, saver, logdir, model)

logdir path: /Users/alexten/Projects/pdpyflow/xor/logdirs/ffbp_logdir_000
Initializing variables from scratch
INFO:tensorflow:Restoring parameters from exercise_params2/exercise_params


InvalidArgumentError: Unsuccessful TensorSliceReader constructor: Failed to get matching files on exercise_params2/exercise_params: Not found: exercise_params2
	 [[Node: xor_exercise_saver/RestoreV2_2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_xor_exercise_saver/Const_0_0, xor_exercise_saver/RestoreV2_2/tensor_names, xor_exercise_saver/RestoreV2_2/shape_and_slices)]]

Caused by op 'xor_exercise_saver/RestoreV2_2', defined at:
  File "/Users/alexten/anaconda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/alexten/anaconda/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-59a0caa71791>", line 9, in <module>
    use_exercise_params(True)
  File "<ipython-input-3-7cfdf4711483>", line 270, in use_exercise_params
    tf.train.Saver(restore_dict, name='xor_exercise_saver').restore(tf.get_default_session(), 'exercise_params2/exercise_params')
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1140, in __init__
    self.build()
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1172, in build
    filename=self._filename)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 688, in build
    restore_sequentially, reshape)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 407, in _AddRestoreOps
    tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 247, in restore_op
    [spec.tensor.dtype])[0])
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 663, in restore_v2
    dtypes=dtypes, name=name)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/alexten/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Unsuccessful TensorSliceReader constructor: Failed to get matching files on exercise_params2/exercise_params: Not found: exercise_params2
	 [[Node: xor_exercise_saver/RestoreV2_2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_xor_exercise_saver/Const_0_0, xor_exercise_saver/RestoreV2_2/tensor_names, xor_exercise_saver/RestoreV2_2/shape_and_slices)]]
