# XOR problem

## 1. Preliminaries
### 1.1. Imports
We begin by importing several python libraries:

In [1]:
# %%bash
# rm -rf train

In [2]:
import os
import tensorflow as tf
import numpy as np
from collections import OrderedDict

i=0
logdir = os.getcwd() + '/train/log_000'
while os.path.exists(logdir):
    i+=.001
    logdir = os.getcwd() + '/train/log_{}'.format(str(i)[2:5])
os.makedirs(logdir)
    
print('tensorflow version: {}'.format(tf.__version__))
print('numpy version: {}'.format(np.__version__))
print('current working directory: {}'.format(os.getcwd()))
print('logdir path: {}'.format(logdir))

tensorflow version: 1.3.0
numpy version: 1.12.1
current working directory: /Users/alexten/Projects/pdpyflow/xor
logdir path: /Users/alexten/Projects/pdpyflow/xor/train/log_000


### 1.2. Functions and Classes

In [3]:
def read_csv_file(filename_queue, batch_size, default_val, inp_size, targ_size, pattern_labels):
    reader = tf.TextLineReader(skip_header_lines=True, name='csv_reader')
    _, csv_row = reader.read_up_to(filename_queue, batch_size)
    defaults = [[default_val] for x in range(inp_size + targ_size)]
    if pattern_labels is True: 
        defaults.insert(0,[''])
    examples = tf.decode_csv(csv_row, record_defaults=defaults)
    p = tf.transpose(examples.pop(0))
    x = tf.transpose(tf.stack(examples[0:inp_size]))
    t = tf.transpose(tf.stack(examples[inp_size:inp_size+targ_size]))
    return p, x, t


def use_exercise_params(use):
    if use:
        all_vars = tf.global_variables()
        hidden_W = [v for v in all_vars if 'hidden_layer/weights' in v.name][0]
        hidden_b = [v for v in all_vars if 'hidden_layer/biases' in v.name][0]
        output_W = [v for v in all_vars if 'output_layer/weights' in v.name][0]
        output_b = [v for v in all_vars if 'output_layer/biases' in v.name][0]
        restore_dict = {'w_1': hidden_W,'b_1': hidden_b,'w_2': output_W,'b_2': output_b}
        tf.train.Saver(restore_dict).restore(tf.get_default_session(), 'exercise_params/exercise_params')


class BasicLayer(object):
    def __init__(self, layer_name, layer_input, size, wrange, nonlin=None, bias=True, seed=None, sparse_inp=False):
        with tf.name_scope(layer_name):
            self.input_ = layer_input
            if type(layer_input) != tf.Tensor and hasattr(layer_name, '__iter__'):
                self.input_ = tf.concat(axis=1, values=[i for i in layer_input])
            input_size = layer_input._shape[1]._value
            with tf.name_scope('weights'):

                self.weights = tf.Variable(
                    tf.random_uniform(
                        minval = wrange[0], 
                        maxval = wrange[1],
                        seed = seed,
                        shape = [input_size, size],
                        dtype=tf.float32
                    )
                )
                tf.summary.tensor_summary('params_summary', self.weights)

            self.biases = 0
            if bias:
                with tf.name_scope('biases'):
                    self.biases = tf.Variable(
                        tf.random_uniform(
                            minval = wrange[0],
                            maxval = wrange[1],
                            seed = seed,
                            shape = [1, size],
                            dtype = tf.float32
                        )
                    )
                    tf.summary.tensor_summary('params_summary', self.biases)

            with tf.name_scope('net_input'):
                self.net_input = tf.matmul(self.input_, self.weights, a_is_sparse=sparse_inp) + self.biases
                tf.summary.tensor_summary('data_summary', self.net_input)

            with tf.name_scope('activations'):
                self.nonlin = nonlin
                if nonlin:
                    self.output = nonlin(self.net_input)
                else:
                    self.output = self.net_input
                tf.summary.tensor_summary('data_summary', self.output)
        
class FFBP_Model(object):
    def __init__(self, train_data, test_data, inp, targ, loss, optimizer):
        self.data = {'Train': train_data, 'Test': test_data}
        self.train_data = train_data
        self.test_data = test_data
        self.inp = inp,
        self.targ = targ,
        self.loss = loss; tf.summary.scalar('loss_summary', self.loss)
        self.optimizer = optimizer
        self._global_step = tf.Variable(0, name='global_step', trainable=False)
        self._train_step = self.optimizer.minimize(loss=self.loss, global_step=self._global_step)
    
    def run_epoch(self, session, train=True, verbose=False):
        fetches  = [self.loss, tf.summary.merge_all(), self._train_step]
        mode, fetches = ('Train', fetches) if train else ('Test', fetches[0:-1])
        with tf.name_scope(mode):
            p, x, t = session.run(self.data[mode])
            out = sess.run(
                fetches = fetches, 
                feed_dict = {self.inp: x, self.targ: t}
            )
        loss, summary = out[0], out[1]
        if verbose: 
            print('epoch {}: {}'.format(tf.train.global_step(session, self._global_step), loss))
        return loss, summary
    
    def save(self, path):
        pass
    
    def load(self, path):
        pass

### 1.3. Setup training and testing environments

In [4]:
# CONFIGS
num_epochs = 330
batch_size = 4
inp_size = 2
targ_size = 1

# QUEUES
with tf.name_scope('Train_input'):
    train_input_queue = tf.train.string_input_producer(
                    ['train_data_B.txt'], 
                    num_epochs = num_epochs, 
                    shuffle = False
    )
    
    train_examples_batch = read_csv_file(
        filename_queue = train_input_queue,
        batch_size = batch_size,
        default_val = 0.0,
        inp_size = inp_size,
        targ_size = targ_size,
        pattern_labels = True
    )

with tf.name_scope('Test_input'):
    test_input_queue = tf.train.string_input_producer(
                    ['train_data_B.txt'], 
                    num_epochs = num_epochs, 
                    shuffle = False
    )
    
    test_examples_batch = read_csv_file(
        filename_queue = test_input_queue,
        batch_size = batch_size,
        default_val = 0.0,
        inp_size = inp_size,
        targ_size = targ_size,
        pattern_labels = True
    )


## 2. Network construction

In [5]:
# CONFIGS
hidden_size = 2
wrange = [-1,1]
seed = None # Use None for random seed value
lr = 0.5
m = 0.9
# use_exercise_params = True
ckpt_freq = 1
ecrit = 0.01

# NETWORK CONSTRUCTION
with tf.name_scope('XOR_model'):
    
    model_inp  = tf.placeholder(dtype = tf.float32, shape=[batch_size, inp_size], name='model_inp')
#     pat_labels = tf.placeholder(dtype = tf.string, shape=[batch_size, ], name='pattern_labels')
    tf.summary.tensor_summary('input_patterns/data_summary', model_inp)
#     tf.summary.tensor_summary('pattern_labels/data_summary', pat_labels)
    
    hidden_layer = BasicLayer(
        layer_name = 'hidden_layer', 
        layer_input = model_inp, 
        size = hidden_size, 
        wrange = [-1,1], 
        nonlin=tf.nn.sigmoid, 
        bias=True, 
        seed=1, 
        sparse_inp=False
    )
    
    output_layer = BasicLayer(
        layer_name = 'output_layer', 
        layer_input = hidden_layer.output, 
        size = targ_size, 
        wrange = [-1,1], 
        nonlin=tf.nn.sigmoid, 
        bias=True, 
        seed=1, 
        sparse_inp=False
    )

    target = tf.placeholder(dtype = tf.float32, shape=[batch_size, targ_size], name='targets')
    
    xor_model = FFBP_Model(
        train_data = train_examples_batch, 
        test_data  = test_examples_batch,
        inp        = model_inp,
        targ       = target,
        loss       = tf.reduce_sum(tf.squared_difference(target, output_layer.output), name='loss_function'),
        optimizer  = tf.train.MomentumOptimizer(lr, m)
    )

all_summaries = tf.summary.merge_all()

## 3. Running graph

In [6]:
sum_freq = 30 # (num_epochs // 10)

with tf.Session().as_default() as sess:
    summary_writer = tf.summary.FileWriter(logdir, sess.graph)
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    use_exercise_params(True) # input False to use custom params
    coordinator = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coordinator)
    for i in range(num_epochs+1):
#             try:
                if any([i % sum_freq == 0, i == num_epochs - 1]):
                    loss, summary = xor_model.run_epoch(session=sess, train=False, verbose=True)
                    summary_writer.add_summary(summary, i)
                loss, summary = xor_model.run_epoch(session=sess, train=True, verbose=False)
                if loss < ecrit: 
                    loss, summary = xor_model.run_epoch(session=sess, train=False, verbose=True)
                    summary_writer.add_summary(summary, i)
                    coordinator.request_stop()
                    coordinator.join(threads)
                    print('Stopped training due to loss < ecrit')
                    break
#             except tf.errors.OutOfRangeError:
#                 print('Reached the end of trainining set')
#                 break

INFO:tensorflow:Restoring parameters from exercise_params/exercise_params
epoch 0: 1.0506523847579956
epoch 30: 1.001964807510376
epoch 60: 1.0000269412994385
epoch 90: 0.9999932050704956
epoch 120: 0.999982476234436
epoch 150: 0.9999575018882751
epoch 180: 0.9998661875724792
epoch 210: 0.9992285370826721
epoch 240: 0.9822090864181519
epoch 270: 0.6883606910705566
epoch 300: 0.043964266777038574
epoch 318: 0.009249473921954632
Stopped training due to loss < ecrit


In [7]:
tf.reset_default_graph() # might be needed

In [8]:
bs = b'ddd'
type(bs)

ss = bs.decode("utf-8")
print(type(ss))
print(ss)

<class 'str'>
ddd
