In [1]:
# import tensorflow and numpy
import tensorflow as tf
import numpy as np

In [2]:
# setup the parameters
# number of input values
vals = 2
# max answer, so basically the width of the frame
max_answer = 100
# number of different M's, biggest gradient will fit in frame
gradients = max_answer/(vals)+1
iterations = 11000
learning_rate = 0.1

# I am using a GPU
# this line limits memory usage of the GPU to 0.4 when session is created
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.25)

In [3]:
# defining function to make training data
def training_data():
    
    M = 0
    rows = 1
    # array which we are using as our x values 
    # in equation of linear line, y = Mx
    # it includes 1 extra value as this will be used as our label
    x = np.arange(vals+1).astype(np.int32)
    # empty array to write our training data to
    y = np.array([])
    
    # loop so it cycles through every gradient
    for i in range(gradients):
        
        y = np.append(y, x*M).reshape(rows,vals+1)
            
        # increase number of rows to reshape it
        rows += 1
        
        # increase gradient by 1
        M+=1
     
    y = y.astype(np.int32)
    # return the training data
    # and number of lines to learn
    return(y,np.size(y,0))

# print the training data
print(training_data())

(array([[  0,   0,   0],
       [  0,   1,   2],
       [  0,   2,   4],
       [  0,   3,   6],
       [  0,   4,   8],
       [  0,   5,  10],
       [  0,   6,  12],
       [  0,   7,  14],
       [  0,   8,  16],
       [  0,   9,  18],
       [  0,  10,  20],
       [  0,  11,  22],
       [  0,  12,  24],
       [  0,  13,  26],
       [  0,  14,  28],
       [  0,  15,  30],
       [  0,  16,  32],
       [  0,  17,  34],
       [  0,  18,  36],
       [  0,  19,  38],
       [  0,  20,  40],
       [  0,  21,  42],
       [  0,  22,  44],
       [  0,  23,  46],
       [  0,  24,  48],
       [  0,  25,  50],
       [  0,  26,  52],
       [  0,  27,  54],
       [  0,  28,  56],
       [  0,  29,  58],
       [  0,  30,  60],
       [  0,  31,  62],
       [  0,  32,  64],
       [  0,  33,  66],
       [  0,  34,  68],
       [  0,  35,  70],
       [  0,  36,  72],
       [  0,  37,  74],
       [  0,  38,  76],
       [  0,  39,  78],
       [  0,  40,  80],
       [  0,  4

In [4]:
# training_line_data = training data in numbers
# training lines = number of different lines
training_line_data,training_lines = training_data()

# the length is for when we convert the numbers into a binary array
# the array will be all zeros except one, which will be 1
# this will be the particle in this pont in time
# each one is like a frame in a video
# this value is the size of the largest M value multiplied by largest x values
length = (gradients-1)*vals-1

# the full length is the length of all the input frames stacked into one, 1d array
full_length = length*vals

# this function turns the data into the arrays explained above
def set_data():
    
    # there are two arrays, one for training data and one for labels
    # input_data  is the converted traiing data
    input_data = np.zeros([training_lines,vals,length])
    # the labels will be one-hot arrays
    labels = np.zeros([training_lines,1,gradients])
    
    # this sets the values specified in the training data to one
    for i in range(training_lines):
        # we need to set each individual input value
        for a in range(vals):
            # set the value to a 1
            input_data[i][a][training_line_data[i][a]] = 1
            
        # set the label value to a 1
        labels[i][0][training_line_data[i][vals-(vals-1)]] = 1
        
    # here, we reshape it tto the full length 1d array
    input_data = input_data.reshape(training_lines,1,full_length)
    
    # return the data and labels
    return(input_data,labels)    

# print converted data
print(set_data())

(array([[[ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       ..., 
       [[ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 1.,  0.,  0., ...,  0.,  0.,  0.]]]), array([[[ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 0.,  1.,  0., ...,  0.,  0.,  0.]],

       [[ 0.,  0.,  1., ...,  0.,  0.,  0.]],

       ..., 
       [[ 0.,  0.,  0., ...,  1.,  0.,  0.]],

       [[ 0.,  0.,  0., ...,  0.,  1.,  0.]],

       [[ 0.,  0.,  0., ...,  0.,  0.,  1.]]]))


In [5]:
# we define the weights, biases and inputs
# this will be input training data
x = tf.placeholder(tf.float32, [None, full_length])
# weights and biases
W = tf.Variable(tf.zeros([full_length, gradients]))
b = tf.Variable(tf.zeros([gradients]))
# function which gives the output of training data
y = tf.matmul(x, W) + b

# we will feed the labels in here
y_ = tf.placeholder(tf.float32, [None, gradients])

# configure the loss function, using cross entropy
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))

# define the optimizer
# AdagradOptimizer works much better than GradientDescentOptimizer
#train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(cross_entropy)


In [7]:
# create interactive session using the GPU line for above
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options)) 

# initialize variables
init = tf.global_variables_initializer()
sess.run(init)

# now we run the learning loop
for _ in range(iterations):

    # set training data and labels
    x_data, y_data = set_data()

    # use next line each step
    x_data = x_data[_%training_lines]
    y_data = y_data[_%training_lines]
    
    # run the training optimizer
    sess.run(train_step, feed_dict={x: x_data, y_: y_data})

    # print steps and error 10 times in total
    if _ % (iterations/20) == 0:
        print 'step', _, 'out of', iterations
        print 'error =', sess.run(cross_entropy, feed_dict={x: x_data, y_: y_data})

step 0 out of 11000
error = 3.63469
step 550 out of 11000
error = 3.391
step 1100 out of 11000
error = 3.03202
step 1650 out of 11000
error = 2.76114
step 2200 out of 11000
error = 2.53485
step 2750 out of 11000
error = 2.40853
step 3300 out of 11000
error = 2.223
step 3850 out of 11000
error = 2.05669
step 4400 out of 11000
error = 1.90606
step 4950 out of 11000
error = 1.76885
step 5500 out of 11000
error = 1.68252
step 6050 out of 11000
error = 1.56489
step 6600 out of 11000
error = 1.45711
step 7150 out of 11000
error = 1.35831
step 7700 out of 11000
error = 1.29378
step 8250 out of 11000
error = 1.20905
step 8800 out of 11000
error = 1.13131
step 9350 out of 11000
error = 1.05997
step 9900 out of 11000
error = 0.994514
step 10450 out of 11000
error = 0.950935


In [11]:
# initialize array to record test values
test_line = np.array([])

print 'input values, the max gradient is', gradients-1
# loop to ask user for input values
for i in range(vals):
    print '\nvalue',i+1,'?'
    val = int(input())
    test_line = np.append(test_line, val)
    
test_line = test_line.astype(np.int32)
# record the last value of the points
# this is needed because we have only worked out the gradient not the bias
last_val = test_line[-1]
# show line that user has inputed
print(test_line)

# function to convert data into training data format
def test_model():
    # first value of the points to take away the b later
    global test_line
    first_value = test_line[0]
    
    # take away the first value so its only left with the difference between
    # the points and predicts y = Mx , not y = Mx+ b
    test_line = np.subtract(test_line,first_value)


    input_array = np.array([])
    # for each input value, write to the input array
    # with a 1 in the position
    for a in range(vals):
        test_array = np.zeros([1,length])
        test_array[0][test_line[a]] = 1
        # we write to array and shape so new line for each test value
        input_array = np.append(input_array, test_array).reshape([1,(a+1)*length])
        
    # return array
    return(input_array)   

# print array
#test_model()

input values, the max gradient is 50

value 1 ?
32

value 2 ?
63
[32 63]


In [12]:
# run the function to get output
# using new weights and biases
input_array = test_model()
probs = sess.run(y, feed_dict={x:input_array})
# now we have found the predicted gradient,
# we add the last value to get the next value
learnt_ans = np.argmax(probs)+last_val
# "true" answer, should work with all linear lines
# this numerically works out hte gradient by findingthe difference,
# then also adds the last value
answer = (test_line[1]-test_line[0])+last_val

if answer == learnt_ans:
    print 'Correct'
    
    # print learnt answer and it probability
    print '\nLearnt answer =', learnt_ans
    print 'Probaility of Gradient', np.argmax(probs), '=', probs[0][learnt_ans-last_val]
else:
    print 'Wrong'
    
    # print learnt answer and it probability
    print '\nLearnt answer =', learnt_ans
    print 'Probaility of Gradient', np.argmax(probs), '=', probs[0][learnt_ans-last_val]
    
    # print true answer and its probabilities
    print '\nNumerical answer =', answer
    print 'Probability of Gradient', test_line[1]-test_line[0], '=', probs[0][answer-last_val]
    

# print the probabilities
print '\n\n', probs

Correct

Learnt answer = 94
Probaility of Gradient 31 = 2.30938


[[-1.23584569 -1.23580122 -1.23575568 -1.2357105  -1.23566532 -1.23561859
  -1.23557365 -1.23552716 -1.23548162 -1.23543429 -1.23538899 -1.23534155
  -1.23529565 -1.23524773 -1.23520136 -1.23515391 -1.23510587 -1.23505902
  -1.23501003 -1.23496389 -1.23491478 -1.23486626 -1.23481846 -1.23476863
  -1.23472142 -1.23467171 -1.2346226  -1.23457479 -1.23452508 -1.23447585
  -1.23442554  2.30938482 -1.22414482 -1.22409952 -1.22405672 -1.23405898
  -1.23401356 -1.2339704  -1.23392463 -1.2338798  -1.23383534 -1.23378921
  -1.23374474 -1.23370063 -1.23365271 -1.2336086  -1.23356378 -1.23351681
  -1.23347282 -1.23342597 -1.23338068]]
