In [1]:
#imports and settings:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

tf.logging.set_verbosity(tf.logging.INFO)

mnist = input_data.read_data_sets('./MNIST_data', one_hot=True)

Extracting ./MNIST_data/train-images-idx3-ubyte.gz
Extracting ./MNIST_data/train-labels-idx1-ubyte.gz
Extracting ./MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ./MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
#helper funcs
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x,W):
    return tf.nn.conv2d(x,W,strides=[1,1,1,1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

def matrix_softmax(x):
    shape = x.get_shape().as_list()
    shape[0] = int(-1)
    return tf.reshape(tf.nn.softmax(tf.contrib.layers.flatten(x)),shape)

def avg_softmax(x):
    shape = x.get_shape().as_list()
    shape[0] = int(-1)
    totalSoft = tf.reshape(tf.nn.softmax(tf.contrib.layers.flatten(x)),shape)
    return tf.reduce_sum(totalSoft,2)

def max_softmax(x):
    shape = x.get_shape().as_list()
    shape[0] = int(-1)
    totalSoft = tf.reshape(tf.nn.softmax(tf.contrib.layers.flatten(x)),shape)
    return tf.reduce_max(totalSoft,2)

In [3]:
#Init model weights & biases
sess = tf.InteractiveSession()

x = tf.placeholder(tf.float32, shape=[None, 784])
x_image = tf.reshape(x, [-1,28,28,1])
y_ = tf.placeholder(tf.float32, shape=[None, 2])

#keep_prob = tf.placeholder(tf.float32)

#conv_layer1
W_conv1 = weight_variable([3,3,1,32])
b_conv1 = bias_variable([32])

#conv_layer2
W_conv2 = weight_variable([3,3,32,32])
b_conv2 = bias_variable([32])

#conv_layer3
W_conv3 = weight_variable([3,3,32,64])
b_conv3 = bias_variable([64])

#conv_layer4
W_conv4 = weight_variable([3,3,64,64])
b_conv4 = bias_variable([64])

#fc layer 1
W_fc1 = weight_variable([7*7*64, 2048])
b_fc1 = bias_variable([2048])

#softmax layer <5
W_softmax1 = weight_variable([2048, 5])
b_softmax1 = bias_variable([5])

#softmax layer >5
W_softmax2 = weight_variable([2048, 5])
b_softmax2 = bias_variable([5])

In [4]:
#Define net
#conv 1
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
#conv 2
h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

h_drop1 = tf.nn.dropout(h_pool2, tf.constant(0.25))

#conv 3
h_conv3 = tf.nn.relu(conv2d(h_drop1, W_conv3) + b_conv3)
#conv 4
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4)
h_pool4 = max_pool_2x2(h_conv4)

h_drop2 = tf.nn.dropout(h_pool4, tf.constant(0.25))

#fc 1
h_pool2_flat = tf.reshape(h_drop2, [-1, 7*7*64])

h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

#Cluster layers
clust1 = tf.matmul(h_fc1, W_softmax1) + b_softmax1
clust2 = tf.matmul(h_fc1, W_softmax2) + b_softmax2

#Classification layer
stackedClusts = tf.stack([clust1,clust2],1)
softmaxMat = matrix_softmax(stackedClusts)
smStacked = tf.reduce_max(softmaxMat,2)

y_conv = smStacked
#y_conv=tf.nn.softmax(tf.matmul(softmaxStacked, W_softmaxgroup) + b_softmaxgroup)

In [5]:
#helper loss funcs
def zBar(x):
    return tf.maximum(tf.concat(x,1),0)
    
def bigU(zb):
    return tf.matmul(tf.transpose(zb,perm=[0,2,1]),zb)

def coactivity(x):
    #Select everything not in the diagonal:
    selection = np.ones(x.shape[1],dtype='float64') - np.eye(x.shape[2],dtype='float32')
    return tf.reduce_sum(tf.multiply(x,selection))

def bigV(x):
    #return tf.reshape(tf.reduce_sum(x,axis=2),(1,-1))
    smallNu=tf.reduce_sum(x,axis=2)
    return tf.matmul(tf.transpose(smallNu),smallNu)

def specialNormalise(x):
    selection = np.ones(x.shape[1],dtype='float32') - np.eye(x.shape[1],dtype='float32')
    top = tf.reduce_sum(tf.multiply(x,selection))
    bottom = tf.multiply(tf.to_float(x.shape[1]-1),tf.reduce_sum(tf.multiply(x,np.eye(x.shape[1],dtype='float32'))))
    return tf.divide(top,bottom)

def frobNorm(x):
    return tf.sqrt(tf.reduce_sum(tf.square(x)))

one = tf.constant(1.0)
zero = tf.constant(0.0)

tresh = tf.constant(0.01)
c1 = tf.constant(1.0)
c2 = tf.constant(1.0)
c3 = lambda affinity: tf.cond(tf.less(affinity,tresh),lambda: tf.constant(0.0003),lambda: tf.constant(0.0))
c4 =tf.constant(0.000001)

In [6]:
#calculate losses
#affinity
bZ = zBar(softmaxMat)
bU = bigU(bZ)
coact = coactivity(bU)
affinity = specialNormalise(bU)

#balance
bV=bigV(bZ)
balance = specialNormalise(bV)

#cross entropy
cross_entropy = tf.reduce_max(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))

frob = frobNorm(softmaxMat)#tf.square(tf.norm(stackedClusts,ord='fro'),axis=(0,1))

loss = cross_entropy + c1*affinity + c2*(1-balance) + c3(affinity)*coact + c4*frob
loss = c1*affinity + c2*(1-balance) + c3(affinity)*coact + c4*frob

In [7]:
train_step = tf.train.AdamOptimizer(1e-5).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())

In [8]:
y = {0:[0,1], 1:[1,0]}

totalSteps = 7000
batchSize = 50
for i in range(totalSteps):
    trainbatch = mnist.train.next_batch(batchSize)
    trainbatch = (trainbatch[0],np.array([y[np.argmax(trainbatch[1][j])>4] for j in range(len(trainbatch[1]))]))
    valbatch = mnist.validation.next_batch(batchSize)
    valbatch = (valbatch[0],np.array([y[np.argmax(valbatch[1][j])>4] for j in range(len(valbatch[1]))]))
    
    if i%100 == 0:
        train_loss,train_acc = sess.run([loss, accuracy],feed_dict={x: trainbatch[0], y_: trainbatch[1]})
        val_loss, val_acc = sess.run([loss, accuracy],feed_dict={x: valbatch[0], y_: valbatch[1]})

        print("step %d/%d \n Train: accuracy: %g, loss: %g \n Validation: accuracy: %g loss: %g"%(i,totalSteps, train_acc, train_loss, val_acc, val_loss))
        af = affinity.eval(feed_dict={x:trainbatch[0], y_: trainbatch[1]})
        ba = balance.eval(feed_dict={x:trainbatch[0], y_: trainbatch[1]})
        co = coact.eval(feed_dict={x:trainbatch[0], y_: trainbatch[1]})
        print("affinity: %g, balance: %g, coact: %g"%(af,(1-ba),co))
    feed_dict = {x: trainbatch[0], y_: trainbatch[1]}
    _ = sess.run([train_step],feed_dict=feed_dict)

step 0/7000 
 Train: accuracy: 0.42, loss: 0.976476 
 Validation: accuracy: 0.44 loss: 0.977142
affinity: 0.00409955, balance: 0.989876, coact: 0.166799
step 100/7000 
 Train: accuracy: 0.44, loss: 0.989706 
 Validation: accuracy: 0.6 loss: 0.987113
affinity: 0.00569435, balance: 0.980017, coact: 0.181679
step 200/7000 
 Train: accuracy: 0.6, loss: 0.978128 
 Validation: accuracy: 0.52 loss: 0.997485
affinity: 0.000448428, balance: 0.986469, coact: 0.317149
step 300/7000 
 Train: accuracy: 0.42, loss: 0.985651 
 Validation: accuracy: 0.52 loss: 0.968303
affinity: 0.00402778, balance: 0.979355, coact: 0.86243
step 400/7000 
 Train: accuracy: 0.4, loss: 0.972731 
 Validation: accuracy: 0.48 loss: 0.989247
affinity: 0.000962384, balance: 0.993747, coact: 0.734215
step 500/7000 
 Train: accuracy: 0.44, loss: 0.972487 
 Validation: accuracy: 0.4 loss: 0.952536
affinity: 0.00254576, balance: 0.990294, coact: 0.462958
step 600/7000 
 Train: accuracy: 0.42, loss: 0.990959 
 Validation: accurac

In [9]:
testSize = 1000
testbatch = mnist.test.next_batch(testSize)
testbatch = (testbatch[0],np.array([y[np.argmax(testbatch[1][j])>4] for j in range(len(testbatch[1]))]))

np.sum(np.argmax(y_conv.eval({x: testbatch[0], y_: testbatch[1]}),axis=1)==np.argmax(testbatch[1],axis=1))/float(testSize)

0.52400000000000002

In [38]:
#sess.run(tf.global_variables_initializer())
y = {0:[0,1], 1:[1,0]}
tb = mnist.test.next_batch(1)
testbatch = (tb[0],np.array([y[np.argmax(tb[1][j])>4] for j in range(len(tb[1]))]))
print tb[1]
smMat, acc = sess.run([softmaxMat,accuracy],feed_dict={x: testbatch[0], y_: testbatch[1]})
print acc
softmaxMat.eval({x: testbatch[0], y_: testbatch[1]})

[[ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]]
0.0


array([[[  0.00000000e+00,   4.51118439e-23,   2.34795500e-15,
           1.95343559e-28,   2.35504170e-15],
        [  0.00000000e+00,   5.16833643e-06,   6.27332169e-23,
           9.99994874e-01,   4.54980701e-32]]], dtype=float32)

In [11]:
t=np.ones((1,2,5))
bigV(zBar(t))

<tf.Tensor 'MatMul_5:0' shape=(2, 2) dtype=float64>