In [1]:
#imports and settings:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

tf.logging.set_verbosity(tf.logging.INFO)

mnist = input_data.read_data_sets('./MNIST_data', one_hot=True)

Extracting ./MNIST_data/train-images-idx3-ubyte.gz
Extracting ./MNIST_data/train-labels-idx1-ubyte.gz
Extracting ./MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ./MNIST_data/t10k-labels-idx1-ubyte.gz


In [17]:
#helper funcs
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

def conv2d(x,W):
  return tf.nn.conv2d(x,W,strides=[1,1,1,1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

In [18]:
#Init model weights & biases
sess = tf.InteractiveSession()

x = tf.placeholder(tf.float32, shape=[None, 784])
x_image = tf.reshape(x, [-1,28,28,1])
y_ = tf.placeholder(tf.float32, shape=[None, 2])

keep_prob = tf.placeholder(tf.float32)

#conv_layer1
W_conv1 = weight_variable([5,5,1,32])
b_conv1 = bias_variable([32])

#conv_layer2
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = bias_variable([64])

#fc layer 1
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])

#softmax layer <5
W_softmax1 = weight_variable([1024, 5])
b_softmax1 = bias_variable([5])

#softmax layer >5
W_softmax2 = weight_variable([1024, 5])
b_softmax2 = bias_variable([5])

#(maybe) softmax combined
W_softmaxgroup = weight_variable([2,2])
b_softmaxgroup = bias_variable([2])

#final fc layer
W_fc2 = weight_variable([1024, 2])
b_fc2 = bias_variable([2])

In [19]:
#Define net
#conv 1
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

#conv 2
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

#fc 1
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

#h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

#Two FC softmax layers
sm1 = tf.nn.softmax(tf.matmul(h_fc1, W_softmax1) + b_softmax1)
sm2 = tf.nn.softmax(tf.matmul(h_fc1, W_softmax2) + b_softmax2)

#Pool both softmaxes into single "softmax" node (?)
sm1_pool = tf.reduce_max(sm1,1)
sm2_pool = tf.reduce_max(sm2,1)

#pool to final classification
softmaxStacked = tf.stack([sm1_pool, sm2_pool],1)
#y_conv = softmaxStacked
y_conv=tf.nn.softmax(tf.matmul(softmaxStacked, W_softmaxgroup) + b_softmaxgroup)

In [20]:
#helper loss funcs
def zBar(x):
  return tf.maximum(tf.concat(x,1),0)
    
def bigU(x):
  return tf.matmul(tf.transpose(x),x)

def coactivity(x):
  #Select everything not in the diagonal:
  selection = np.ones(x.shape,dtype='float32') - np.eye(x.shape[1],dtype='float32')
  return tf.reduce_sum(tf.matmul(x,selection))

def bigV(x):
  smallNu=tf.reshape(tf.reduce_sum(x,axis=0),(1,-1))
  return tf.matmul(tf.transpose(smallNu),smallNu)

def specialNormalise(x):
  selection = np.ones(x.shape,dtype='float32') - np.eye(x.shape[0],dtype='float32')
  top = tf.reduce_sum(tf.matmul(x,selection))
  bottom = tf.multiply(tf.to_float(x.shape[0]-1),tf.reduce_sum(tf.matmul(x,np.eye(x.shape[0],dtype='float32'))))
  return tf.divide(top,bottom)

one = tf.constant(0.2)
zero = tf.constant(0.0)

tresh = tf.constant(0.01)
c1 = one
c2 = one
c3 = lambda affinity: tf.cond(tf.less(affinity,tresh),lambda: one,lambda: zero)
c4 = one

In [21]:
#calculate losses
#affinity
bZ = zBar([sm1,sm2])
bU = bigU(bZ)
coact = coactivity(bU)
affinity = specialNormalise(bU)

#balance
bV=bigV(bZ)
balance = specialNormalise(bV)

#cross entropy
cross_entropy = tf.reduce_max(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))

loss = cross_entropy #+ c1*affinity + c2*(1-balance) + c3*coact + c4*tf.square(tf.norm(tf.concat([sm1,sm2],1),ord='fro',axis=(0,1)))

In [22]:
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())

In [23]:
y = {0:[0,1], 1:[1,0]}

totalSteps = 20000
batchSize = 100
for i in range(totalSteps):
    trainbatch = mnist.train.next_batch(batchSize)
    trainbatch = (trainbatch[0],np.array([y[np.argmax(trainbatch[1][j])>4] for j in range(len(trainbatch[1]))]))
    valbatch = mnist.validation.next_batch(batchSize)
    valbatch = (valbatch[0],np.array([y[np.argmax(valbatch[1][j])>4] for j in range(len(valbatch[1]))]))
    
    if i%100 == 0:
        train_loss,train_acc = sess.run([loss, accuracy],feed_dict={x: trainbatch[0], y_: trainbatch[1], keep_prob: 1})
        #train_loss = loss.eval(feed_dict={x:trainbatch[0], y_: trainbatch[1], keep_prob: 1.0})
        val_loss, val_acc = sess.run([loss, accuracy],feed_dict={x: valbatch[0], y_: valbatch[1], keep_prob: 1})
        #val_loss = loss.eval(feed_dict={x:valbatch[0], y_:valbatch[1], keep_prob: 1.0})
        #train_acc = np.sum(np.argmax(train_y,axis=1)==np.argmax(trainbatch[1],axis=1))/float(batchSize)
        #val_acc = np.sum(np.argmax(val_y,axis=1)==np.argmax(valbatch[1],axis=1))/float(batchSize)
        print("step %d/%d \n Train: accuracy: %g, loss: %g \n Validation: accuracy: %g loss: %g"%(i,totalSteps, train_acc, train_loss, val_acc, val_loss))
        #af = affinity.eval(feed_dict={x:trainbatch[0], y_: trainbatch[1], keep_prob: 1.0})
        #ba = balance.eval(feed_dict={x:trainbatch[0], y_: trainbatch[1], keep_prob: 1.0})
        #co = coact.eval(feed_dict={x:trainbatch[0], y_: trainbatch[1], keep_prob: 1.0})
        #print("affinity: %g, balance: %g, coact: %g"%(af,(1-ba),co))
    feed_dict = {x: trainbatch[0], y_: trainbatch[1], keep_prob: 0.5}
    _ = sess.run([train_step],feed_dict=feed_dict)

step 0/20000 
 Train: accuracy: 0.64, loss: 1.18013 
 Validation: accuracy: 0.54 loss: 1.11293
step 100/20000 
 Train: accuracy: 0.55, loss: 1.07288e-06 
 Validation: accuracy: 0.53 loss: 8.34465e-07
step 200/20000 
 Train: accuracy: 0.49, loss: 2.38419e-07 
 Validation: accuracy: 0.39 loss: 1.19209e-07
step 300/20000 
 Train: accuracy: 0.49, loss: 1.19209e-07 
 Validation: accuracy: 0.48 loss: 1.19209e-07
step 400/20000 
 Train: accuracy: 0.51, loss: 1.19209e-07 
 Validation: accuracy: 0.38 loss: -0
step 500/20000 
 Train: accuracy: 0.54, loss: -0 
 Validation: accuracy: 0.56 loss: -0
step 600/20000 
 Train: accuracy: 0.45, loss: -0 
 Validation: accuracy: 0.48 loss: -0
step 700/20000 
 Train: accuracy: 0.51, loss: -0 
 Validation: accuracy: 0.48 loss: -0
step 800/20000 
 Train: accuracy: 0.51, loss: -0 
 Validation: accuracy: 0.51 loss: -0
step 900/20000 
 Train: accuracy: 0.5, loss: -0 
 Validation: accuracy: 0.48 loss: -0
step 1000/20000 
 Train: accuracy: 0.41, loss: -0 
 Validati

KeyboardInterrupt: 

In [None]:
testSize = 1000
testbatch = mnist.test.next_batch(testSize)
testbatch = (testbatch[0],np.array([y[np.argmax(testbatch[1][j])>4] for j in range(len(testbatch[1]))]))

np.sum(np.argmax(y_conv.eval({x: testbatch[0], y_: testbatch[1], keep_prob: 0.5}),axis=1)==np.argmax(testbatch[1],axis=1))/float(testSize)