# XOR problem

## 1. Preliminaries
### 1.1. Imports
We begin by importing several python libraries:

In [1]:
%%bash
rm -rf train

In [2]:
import os
import pickle
import tensorflow as tf
import numpy as np
from collections import OrderedDict, namedtuple

i=0
logdir = os.getcwd() + '/train/np_log_000'
while os.path.exists(logdir):
    i+=.001
    logdir = os.getcwd() + '/train/np_log_{}'.format(str(i)[2:5])
os.makedirs(logdir)
    
print('tensorflow version: {}'.format(tf.__version__))
print('numpy version: {}'.format(np.__version__))
print('current working directory: {}'.format(os.getcwd()))
print('logdir path: {}'.format(logdir))

tensorflow version: 1.3.0
numpy version: 1.12.1
current working directory: /Users/alexten/Projects/pdpyflow/xor
logdir path: /Users/alexten/Projects/pdpyflow/xor/train/np_log_000


### 1.2. Functions and Classes

In [3]:
def snap2pickle(logdir, snap):
    path = '/'.join([logdir,'snap.pkl'])
    try:
        with open(path, 'rb') as old_file:
            old_snap = pickle.load(old_file)
        with open(path, 'wb') as old_file:
            old_snap.append(snap)
            pickle.dump(old_snap, old_file)
    except FileNotFoundError:
        with open(path, 'wb') as new_file:
            out = pickle.dump([snap], new_file)


class InputData(object):
    def __init__(self, path_to_data_file, batch_size, inp_size, targ_size, data_len,
                 shuffle = False, buffer = None, batch_seed = None):
        # Store useful params
        self.path = path_to_data_file
        self.batch_size = batch_size
        self.inp_size = inp_size
        self.targ_size = targ_size
        self.data_len = data_len

        # setup filename queue
        filename_queue = tf.train.string_input_producer(string_tensor = [path_to_data_file], shuffle = False)

        # create reader and setup defaul values to read from files in the filename queue
        reader = tf.TextLineReader(skip_header_lines=True, name='csv_reader')
        key, record_string = reader.read(filename_queue)
        defaults = [[0.0] for x in range(inp_size + targ_size)]
        defaults.insert(0,[''])
        examples = tf.decode_csv(record_string, record_defaults=defaults)

        # read and decode examples into tensors
        pattern_labels = tf.transpose(examples.pop(0))
        input_patterns = tf.transpose(tf.stack(examples[0:inp_size]))
        target_patterns = tf.transpose(tf.stack(examples[inp_size:inp_size + targ_size]))

        # setup a batch queue for tensor examples
        if shuffle:
            assert type(buffer) is int, 'provide buffer size if you want to shuffle batch'
            min_after_dequeue = buffer
            self.example_batches = tf.train.shuffle_batch(
                tensors = [pattern_labels, input_patterns, target_patterns], 
                batch_size = batch_size, 
                capacity = min_after_dequeue + 3 * batch_size,
                min_after_dequeue = min_after_dequeue,
                seed = batch_seed,
            )
        else:
            self.example_batches = tf.train.batch(
                tensors = [pattern_labels, input_patterns, target_patterns],
                batch_size = batch_size,
                num_threads = 1,
                capacity = 3 * batch_size,
            )


class BasicLayer(object):
    def __init__(self, layer_name, layer_input, size, wrange, nonlin=None, bias=True, seed=None, sparse_inp=False):
        self.name = layer_name
        with tf.name_scope(layer_name):
            self.input_ = layer_input
            if type(layer_input) != tf.Tensor and hasattr(layer_name, '__iter__'):
                self.input_ = tf.concat(axis=1, values=[i for i in layer_input])
            input_size = layer_input._shape[1]._value
            with tf.name_scope('weights'):

                self.weights = tf.Variable(
                    tf.random_uniform(
                        minval = wrange[0], 
                        maxval = wrange[1],
                        seed = seed,
                        shape = [input_size, size],
                        dtype=tf.float32
                    )
                )

            self.biases = 0
            if bias:
                with tf.name_scope('biases'):
                    self.biases = tf.Variable(
                        tf.random_uniform(
                            minval = wrange[0],
                            maxval = wrange[1],
                            seed = seed,
                            shape = [size],
                            dtype = tf.float32
                        )
                    )

            with tf.name_scope('net_input'):
                self.net_input = tf.matmul(self.input_, self.weights, a_is_sparse=sparse_inp) + self.biases

            with tf.name_scope('activations'):
                self.nonlin = nonlin
                if nonlin:
                    self.output = nonlin(self.net_input)
                else:
                    self.output = self.net_input
    
    def add_gradient_ops(self, loss):
        with tf.name_scope(self.name):
            item_keys = ['net_input', 'activation', 'weights']
            items = [self.net_input, self.output, self.weights]
            if self.biases: 
                item_keys.append('biases')
                items.append(self.biases)
            grad_list = tf.gradients(loss, items)
            grad_list_with_keys = [val for pair in zip(item_keys, grad_list) for val in pair]
            self.gradient = {k:v for k,v in zip(*[iter(grad_list_with_keys)]*2)}
            
            for grad_op, str_key in zip(grad_list, item_keys):
                self.__dict__['g{}'.format(str_key)] = grad_op
    
    def fetch_test_ops(self):
        fetch_items = ['weights', 'biases', 'net_input', 'activation',
                       'gweights', 'gbiases', 'gnet_input', 'gactivation']
        fetch_ops = {}
        for fi in fetch_items:
            if fi in self.__dict__.keys():
                fetch_ops[fi] = self.__dict__[fi]
        return fetch_ops, self.name


class FFBPModel(object):
    def __init__(self, loss, optimizer, layers, inp, targ, train_data=None, test_data=None):
        self.loss = loss

        self.optimizer = optimizer
        self._global_step = tf.Variable(0, name='global_step', trainable=False)
        self._train_step = self.optimizer.minimize(loss=self.loss, global_step=self._global_step)        
        
        self.layers = layers
        for layer in self.layers: 
            layer.add_gradient_ops(loss=self.loss)

        self.inp = inp
        self.targ = targ
        self.inp_labels = tf.placeholder(shape=(), dtype=tf.string)

        self.data = {'Test': test_data, 'Train': train_data}
        
        if test_data:
            self.test_data = self.data['Test'] = test_data
            self._test_fetches = {
                'loss'  : self.loss,
                'enum'  : self._global_step,
                'labels': self.inp_labels,
                'input' : self.inp,
                'target': self.targ
            }
            self._prev_param = {}
        if train_data:  
            train_loss_summary = tf.summary.scalar('train_loss_summary', self.loss)
            self.data['Train'] = train_data
            self._train_fetches = {'loss':self.loss, 'summaries': tf.summary.merge([train_loss_summary]), '_train_step': self._train_step}

    def test_epoch(self, session, verbose=False):
        assert self.test_data is not None, 'Provide test data to run a test epoch'
        
        data = self.data['Test']
        snap = {}
        with tf.name_scope('Test'):
            ps, xs, ts = session.run(data.example_batches)
            loss_sum = 0
            for p, x, t in zip(ps, xs, ts):
                x = np.expand_dims(x,0)
                t = np.expand_dims(t,0)
                test_out = sess.run(
                    fetches = self._test_fetches, 
                    feed_dict = {self.inp_labels: p, self.inp: x, self.targ: t}
                )
                
                for k, v in test_out.items():
                    if k=='enum':
                        snap[k] = v
                    elif k not in snap.keys(): 
                        snap[k] = np.expand_dims(
                                v,
                                axis = 0
                            )
                    else:
                        snap[k] = np.concatenate(
                                [snap[k], np.expand_dims(v, axis=0)],
                                axis=0
                            )
                
                for layer in self.layers:
                    layer_fetches, layer_name = layer.fetch_test_ops()
                    snap.setdefault(layer_name, {})
                    layer_out = sess.run(
                        fetches = layer_fetches, 
                        feed_dict = {self.inp_labels: p, self.inp: x, self.targ: t}
                    )
                    for k, v in layer_out.items():
                        if k=='weights' or k=='biases':
                            snap[layer_name][k] = v
                            if snap['enum'] == 0:
                                self._prev_param[k] = v
                            else:
                                snap[layer_name]['d{}'.format(k)] =  v - self._prev_param[k]
                        elif k not in snap[layer_name].keys():
                            snap[layer_name][k] = np.expand_dims(
                                v,
                                axis = 0
                            )
                        else:
                            snap[layer_name][k] = np.concatenate(
                                [snap[layer_name][k], np.expand_dims(v, axis=0)],
                                axis=0
                            )
                loss_sum += test_out['loss']

            if verbose: 
                print('epoch {}: {}'.format(tf.train.global_step(session, self._global_step), loss_sum))
#                 for k, v in snap.items():
#                     if type(v) is dict:
#                         print(k)
#                         for kk, vv in v.items():
#                             print('{}:\n{}'.format(kk, vv))
#                     else: 
#                         print('{}:\n{}'.format(k, v))
                        
            
            return loss_sum, snap

    def train_epoch(self, session, verbose=False):
        assert self.test_data is not None, 'Provide train data to run a train epoch'
        data = self.data['Train']
                                          
        with tf.name_scope('Train'):
            for mini_batch in range(data.data_len // data.batch_size):
                p, x, t = session.run([t for t in data.example_batches])
                evaled_ops = sess.run(
                    fetches = self._train_fetches,
                    feed_dict = {self.inp: x, self.targ: t}
                )
            batch_loss, summary = evaled_ops['loss'], evaled_ops['summaries']

        if verbose:
            print('epoch {}: {}'.format(tf.train.global_step(session, self._global_step), loss_sum))
                                          
        return batch_loss, summary
    
    def save(self, path):
        pass
    
    def load(self, path):
        pass


def use_exercise_params(use):
    if use:
        all_vars = tf.global_variables()
        hidden_W = [v for v in all_vars if 'hidden_layer/weights' in v.name][0]
        hidden_b = [v for v in all_vars if 'hidden_layer/biases' in v.name][0]
        output_W = [v for v in all_vars if 'output_layer/weights' in v.name][0]
        output_b = [v for v in all_vars if 'output_layer/biases' in v.name][0]
        restore_dict = {'w_1': hidden_W,'b_1': hidden_b,'w_2': output_W,'b_2': output_b}
        tf.train.Saver(restore_dict).restore(tf.get_default_session(), 'exercise_params/exercise_params')

## 2. Construction

In [4]:
# CONFIGS
num_epochs = 330
batch_size = 4
inp_size = 2
targ_size = 1
data_len = 4

hidden_size = 2
wrange = [-1,1]
seed = None 
lr = 0.5
m = 0.9
ckpt_freq = 1
ecrit = 0.01

configs = {
    'batch_size': 4,
    'lrate': 0.5,
    'momentum': 0.9,
    'ecrit': 0.01,
}

with tf.name_scope('train_data'):
    train_examples = InputData(
        path_to_data_file = 'train_data_B.txt', 
        batch_size = batch_size, 
        inp_size = 2, 
        targ_size = 1,
        data_len = data_len,
        shuffle = False, 
        buffer = None,
        batch_seed = None
    )

with tf.name_scope('test_data'):
    test_examples = InputData(
        path_to_data_file = 'train_data_B.txt', 
        batch_size = data_len, 
        inp_size = 2, 
        targ_size = 1,
        data_len = data_len,
        shuffle = False, 
        buffer = None,
        batch_seed = None
    )

# NETWORK CONSTRUCTION
with tf.name_scope('XOR_model'):
    
    model_inp  = tf.placeholder(dtype = tf.float32, shape=[None, inp_size], name='model_inp')
    
    hidden_layer = BasicLayer(
        layer_name = 'hidden_layer', 
        layer_input = model_inp, 
        size = hidden_size, 
        wrange = [-1,1], 
        nonlin=tf.nn.sigmoid, 
        bias=True, 
        seed=1, # Use None for random seed value
        sparse_inp=False
    )
    
    output_layer = BasicLayer(
        layer_name = 'output_layer', 
        layer_input = hidden_layer.output, 
        size = targ_size, 
        wrange = [-1,1], 
        nonlin=tf.nn.sigmoid, 
        bias=True, 
        seed=1, # Use None for random seed value
        sparse_inp=False
    )

    target = tf.placeholder(dtype = tf.float32, shape=[None, targ_size], name='targets')
    
    xor_model = FFBPModel(
        layers = [hidden_layer, output_layer],
        train_data = train_examples, 
        inp        = model_inp,
        targ       = target,
        loss       = tf.reduce_sum(tf.squared_difference(target, output_layer.output), name='loss_function'),
        optimizer  = tf.train.MomentumOptimizer(lr, m),
        test_data  = test_examples
    )

## 3. Running graph

In [5]:
sum_freq = 30 # (num_epochs // 10)
test_epochs = [0,30,60]+[180,300]

with tf.Session().as_default() as sess:
    summary_writer = tf.summary.FileWriter(logdir, sess.graph)
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    use_exercise_params(True) # input False to use custom params
    coordinator = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coordinator)
    
    for i in range(num_epochs+1):
        # Periodically test before each train epoch, or when num_epochs is reached
        if any([i==test_epoch for test_epoch in test_epochs]):
            loss, snap = xor_model.test_epoch(session=sess, verbose=True)
            snap2pickle(logdir, snap)
            
        # Run train epoch
        loss, summary = xor_model.train_epoch(session=sess, verbose=False)
        
        # Test and break if loss < ecrit
        if loss < ecrit: 
            loss, snap = xor_model.test_epoch(session=sess, verbose=True)
            snap2pickle(logdir, snap)
            
            coordinator.request_stop()
            coordinator.join(threads)
            
            print('Stopped training due to loss < ecrit')
            break

INFO:tensorflow:Restoring parameters from exercise_params/exercise_params
epoch 0: 1.0506523847579956
epoch 30: 1.0019647181034088
epoch 60: 1.0000269263982773
epoch 180: 0.9998661577701569
epoch 300: 0.04396426538005471
epoch 318: 0.009249473921954632
Stopped training due to loss < ecrit


In [6]:
tf.reset_default_graph() # might be needed