# XOR problem

## 1. Preliminaries
### 1.1. Imports
We begin by importing several python libraries:

In [1]:
# %%bash
# rm -rf train

In [2]:
import os
import tensorflow as tf
import numpy as np

i=0
logdir = os.getcwd() + '/train/log_000'
while os.path.exists(logdir):
    i+=.001
    logdir = os.getcwd() + '/train/log_{}'.format(str(i)[2:5])
os.makedirs(logdir)
    
print('tensorflow version: {}'.format(tf.__version__))
print('numpy version: {}'.format(np.__version__))
print('current working directory: {}'.format(os.getcwd()))
print('tensorboard logdir path: {}'.format(logdir))

tensorflow version: 1.3.0
numpy version: 1.12.1
current working directory: /Users/alexten/Projects/pdpyflow/xor
tensorboard logdir path: /Users/alexten/Projects/pdpyflow/xor/train/log_003


### 1.2. Functions

In [3]:
def read_csv_file(filename_queue, batch_size, default_val, inp_size, targ_size, pattern_labels):
    reader = tf.TextLineReader(skip_header_lines=True, name='csv_reader')
    _, csv_row = reader.read_up_to(filename_queue, batch_size)
    defaults = [[default_val] for x in range(inp_size + targ_size)]
    if pattern_labels is True: 
        defaults.insert(0,[''])
    examples = tf.decode_csv(csv_row, record_defaults=defaults)
    p = tf.transpose(examples.pop(0))
#     tf.summary.text('pattern_labels', p)
    x = tf.transpose(tf.stack(examples[0:inp_size]))
    t = tf.transpose(tf.stack(examples[inp_size:inp_size + targ_size]))
    return p, x, t


def use_exercise_params(session=None):
    if session:
        all_vars = tf.global_variables()
        hidden_W = [v for v in all_vars if 'hidden_layer/weights' in v.name][0]
        hidden_b = [v for v in all_vars if 'hidden_layer/biases' in v.name][0]
        output_W = [v for v in all_vars if 'output_layer/weights' in v.name][0]
        output_b = [v for v in all_vars if 'output_layer/biases' in v.name][0]
        restore_dict = {'w_1': hidden_W,'b_1': hidden_b,'w_2': output_W,'b_2': output_b}
        tf.train.Saver(restore_dict).restore(session, 'exercise_params/exercise_params')

def make_layer(layer_name, layer_input, size, wrange, nonlin=None, bias=True, seed=None, sparse_inp=False):
    with tf.name_scope(layer_name):
        if type(layer_input) != tf.Tensor and hasattr(layer_name, '__iter__'):
            layer_input = tf.concat(axis=1, values=[i for i in layer_input])
        input_size = layer_input._shape[1]._value
        with tf.name_scope('weights'):
            
            weights = tf.Variable(
                tf.random_uniform(
                    minval = wrange[0], 
                    maxval = wrange[1],
                    seed = seed,
                    shape = [input_size, size],
                    dtype=tf.float32
                )
            )
            tf.summary.tensor_summary('params_summary', weights)
        
        biases = 0
        if bias:
            with tf.name_scope('biases'):
                biases = tf.Variable(
                    tf.random_uniform(
                        minval = wrange[0],
                        maxval = wrange[1],
                        seed = seed,
                        shape = [1, size],
                        dtype = tf.float32
                    )
                )
                tf.summary.tensor_summary('params_summary', biases)

        with tf.name_scope('net_input'):
            net_input = tf.matmul(layer_input, weights, a_is_sparse=sparse_inp) + biases
            tf.summary.tensor_summary('data_summary', net_input)
        
        with tf.name_scope('activations'):
            if nonlin:
                layer_output = nonlin(net_input)
            else:
                layer_output = net_input
            tf.summary.tensor_summary('data_summary', layer_output)
    print(weights)
    return layer_output


def run_epoch(model, train=True):
    name_scope = 'test'
    if train:
        name_scope = 'train'
    with tf.name_scope(name_scope):
        target = tf.placeholder(dtype = tf.float32, shape=[batch_size, targ_size], name='model_inp')
        squared_error = tf.reduce_sum(tf.squared_difference(target, output_acts),
                                    name='squared_error')
        tf.summary.scalar('error_summary', squared_error)
        train_step = tf.train.MomentumOptimizer(lr, m).minimize(squared_error)

### 1.3. Training environment and input pipeline

In [4]:
# CONFIGS
num_epochs = 330
batch_size = 4
inp_size = 2
targ_size = 1

# QUEUES
with tf.name_scope('Input_pipeline'):
    input_queue = tf.train.string_input_producer(
                    ['train_data_B.txt'], 
                    num_epochs = num_epochs, 
                    shuffle = False
    )

    pattern, inp_batch, targ_batch = read_csv_file(
                                        filename_queue = input_queue,
                                        batch_size = batch_size,
                                        default_val = 0.0,
                                        inp_size = inp_size,
                                        targ_size = targ_size,
                                        pattern_labels = True
    )

## 2. Network construction

In [5]:
# CONFIGS
hidden_size = 2
wrange = [-1,1]
seed = None # Use None for random seed value
lr = 0.5
m = 0.9
# use_exercise_params = True
ckpt_freq = 1
ecrit = 0.01

# NETWORK CONSTRUCTION
with tf.name_scope('XOR_model'):
    
    model_inp  = tf.placeholder(dtype = tf.float32, shape=[batch_size, inp_size], name='model_inp')
    pat_labels = tf.placeholder(dtype = tf.string, shape=[batch_size, ], name='pattern_labels')
    tf.summary.tensor_summary('input_patterns/data_summary', model_inp)
    tf.summary.tensor_summary('pattern_labels/data_summary', pat_labels)
    
    hidden_acts = make_layer(
        layer_name = 'hidden_layer', 
        layer_input = model_inp, 
        size = hidden_size, 
        wrange = [-1,1], 
        nonlin=tf.nn.sigmoid, 
        bias=True, 
        seed=1, 
        sparse_inp=False
    )
    
    output_acts = make_layer(
        layer_name = 'output_layer', 
        layer_input = hidden_acts, 
        size = targ_size, 
        wrange = [-1,1], 
        nonlin=tf.nn.sigmoid, 
        bias=True, 
        seed=1, 
        sparse_inp=False
    )

    with tf.name_scope('train'):
        target = tf.placeholder(dtype = tf.float32, shape=[batch_size, targ_size], name='targets')
        tf.summary.tensor_summary('target_patterns/data_summary', target)
        squared_error = tf.reduce_sum(tf.squared_difference(target, output_acts),
                                    name='squared_error')
        tf.summary.scalar('error_summary', squared_error)
        train_step = tf.train.MomentumOptimizer(lr, m).minimize(squared_error)

merge_summaries = tf.summary.merge_all()

<tf.Variable 'XOR_model/hidden_layer/weights/Variable:0' shape=(2, 2) dtype=float32_ref>
<tf.Variable 'XOR_model/output_layer/weights/Variable:0' shape=(2, 1) dtype=float32_ref>


## 3. Running graph

In [6]:
sum_freq = 30 # (num_epochs // 10)
with tf.Session() as sess:
    summary_writer = tf.summary.FileWriter(logdir, sess.graph)
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    use_exercise_params(sess) # input None to use custom params, input current session to use exercise params
    coordinator = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coordinator)
    for i in range(num_epochs+1):
#             try:
                p, x, t = sess.run([pattern, inp_batch, targ_batch])
                _, ppp , loss, summary = sess.run([pat_labels, train_step, squared_error, merge_summaries], 
                                            feed_dict={model_inp: x, target: t, pat_labels: p})
                print(ppp)
                if i % sum_freq == 0 or i == num_epochs - 1:
                    summary_writer.add_summary(summary, i)
                    print('epoch {}: {}'.format(i,loss))
                if loss < ecrit:
                    summary_writer.add_summary(summary, i)
                    print('stop epoch {}: {}'.format(i,loss))
                    summary_writer.close()
                    break
#             except tf.errors.OutOfRangeError:
#                 print('Reached the end of trainining set')
#                 break

INFO:tensorflow:Restoring parameters from exercise_params/exercise_params
[b'p00' b'p01' b'p10' b'p11']
epoch 0: 1.0506523847579956
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11'

[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
epoch 300: 0.043964266777038574
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
[b'p00' b'p01' b'p10' b'p11']
stop epoch 317: 0.009680919349193573


In [7]:
tf.reset_default_graph() # might be needed

In [8]:
bs = b'ddd'
type(bs)

ss = bs.decode("utf-8")
print(type(ss))
print(ss)

<class 'str'>
ddd
