In [1]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import tensorflow as tf
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np

batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28


Using TensorFlow backend.


In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

def build_session(gpu_fraction=0.2, allow_growth=True):
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction,
                                allow_growth=allow_growth)
    return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))


sess = build_session()
K.set_session(sess)
init_op = tf.group(tf.global_variables_initializer(),
                   tf.local_variables_initializer())
sess.run(init_op)

In [3]:
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# # convert class vectors to binary class matrices
# y_train = keras.utils.to_categorical(y_train, num_classes)
# y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [4]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))
model.add(Flatten())
model.add(Dense(128))

# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.25))
# model.add(Dense(64))

In [5]:
def mat_dist(input_labels,inputs,flag_l2norm=True,):
    # do softmax cross-entropy
    lshape = K.tf.shape(input_labels)
    #assert lshape.shape == 1
    labels = K.tf.reshape(input_labels, [lshape[0], 1])

    mask = K.tf.to_int32(K.tf.equal(labels, K.tf.transpose(labels)))
    positive_mask = K.tf.matrix_band_part(mask, 0, -1) - tf.matrix_band_part(mask, 0, 0)
    negative_mask = K.tf.matrix_band_part(1 - mask, 0, -1)
#     print('gcont: flag_l2norm',flag_l2norm)
    if flag_l2norm:
        inputs = K.l2_normalize(inputs,axis=1)
        
    sim = K.tf.matmul(inputs, K.tf.transpose(inputs))
    # 使其取值范围在0-1区间上，sn目标为0而sp目标为1
#     sim = (sim + 1.) / 2.
    pos_sim = tf.boolean_mask(sim, positive_mask)
    neg_sim = tf.boolean_mask(sim, negative_mask)
    
    return pos_sim, neg_sim


def circle_loss(input_labels,inputs,margin = 0.25, gamma=256):
    pos_sim, neg_sim = mat_dist(input_labels, inputs)
    ap = K.clip(- K.tf.stop_gradient(pos_sim) + 1 + margin, min_value=0., max_value=2.)
    an = K.clip(K.tf.stop_gradient(neg_sim) + margin, min_value=0., max_value=2.)
    
    delta_p = 1 - margin
    delta_n = margin

    logit_p = - ap * (pos_sim - delta_p) * gamma
    logit_n = an * (neg_sim - delta_n) * gamma
    loss = K.softplus(K.logsumexp(logit_p)+K.logsumexp(logit_n))

    return loss# K.max(an), K.min(an)

In [6]:
model.compile(loss=circle_loss,
              optimizer=keras.optimizers.SGD(),
              metrics=[])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=200,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score)
# print('Test accuracy:', score[1])

## with_model evaluation

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 60000 samples, validate on 10000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/2

Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
Test loss: 153.8179127441406


In [7]:
clf_model = Sequential()
clf_model.add(Dense(num_classes, activation='softmax'))
clf_model.compile(loss='sparse_categorical_crossentropy',
              optimizer=keras.optimizers.SGD(),
              metrics=['accuracy'])

# x_batch, y_batch = model.predict(x_train[:batch_size]), keras.utils.to_categorical(y_train[:batch_size], num_classes)
# x_batch, y_batch = model.predict(x_train[:batch_size]), y_train[:batch_size]
# print(x_batch.shape, y_batch.shape)
# clf_model.train_on_batch(x_batch, y_batch)

tmp_train = model.predict(x_train)
tmp_train = tmp_train / np.linalg.norm(tmp_train, axis=1)[:, np.newaxis]
tmp_test = model.predict(x_test)
tmp_test = tmp_test / np.linalg.norm(tmp_test, axis=1)[:, np.newaxis]

clf_model.fit(tmp_train, y_train,
          batch_size=batch_size,
          epochs=20,
          verbose=1,
          validation_data=(tmp_test, y_test))
score = clf_model.evaluate(tmp_test, y_test, verbose=0)
print(score)

Train on 60000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
  128/60000 [..............................] - ETA: 1s - loss: 0.3505 - acc: 0.9922

KeyboardInterrupt: 

In [None]:
mlp = Sequential()
mlp.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
mlp.add(Conv2D(64, (3, 3), activation='relu'))
mlp.add(MaxPooling2D(pool_size=(2, 2)))
mlp.add(Dropout(rate=0.25))
mlp.add(Flatten())
mlp.add(Dense(128, activation='relu'))
mlp.add(Dropout(0.5))
mlp.add(Dense(num_classes, activation='softmax'))
mlp.compile(loss='sparse_categorical_crossentropy',
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])
mlp.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=20,
          verbose=1,
          validation_data=(x_test, y_test))
score = mlp.evaluate(x_test, y_test, verbose=0)

## without_model evaluation

In [9]:
stereo_type = []
for i in range(num_classes):
    tmp_i = model.predict(x_train[y_train==i])
    tmp_i = tmp_i / np.linalg.norm(tmp_i, axis=1)[:, np.newaxis]
    tmp_i = np.mean(tmp_i, axis=0)
    stereo_type.append(tmp_i)           
stereo_type = np.array(stereo_type)

tmp_test = model.predict(x_test)
tmp_test = tmp_test / np.linalg.norm(tmp_test, axis=1)[:, np.newaxis]
label = np.argmax(np.matmul(tmp_test, stereo_type.T), axis=1)
print("acc", np.sum(y_test == label))
# stereo_type = [np.mean(model.predict(x_train[y_train==i]), axis=0) ]

acc 9891


In [10]:
pos_sim, neg_sim = sess.run(mat_dist(y_train[:batch_size], model.predict(x_train[:batch_size])))
print(np.mean(pos_sim), np.mean(neg_sim))

0.8886915 0.65426034
