# Convolutional neural network
Dataset and code by Lauren Hayward Sierens and Juan Carrasquilla
This code will classify the phases of the classical Ising gauge theory
using a neural network with a convolutional layer.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
# Data parameters: ###
L           = 16    # lattice length L
num_labels  = 2     # Number of labels (T=0 and T=infinity here)
num_sublattices = 2 # Number of sublattices for the gauge theory lattice

In [54]:
# Hyperparameters: ###
frac_train     = 0.7    # Fraction of data used for training
patch_size     = 2      # Size of the filters
num_filters    = 64     # Number of output channels in the convolutional layer
stride_xy      = 1      # The size of the jumps to take as one slides the filters across the image (the lattice length L should be divisible by stride_xy)
nH2            = 64     # Number of hidden neurons in the fully-connected layer
keep_prob      = 0.5    # Probability of keeping neurons in the dropout layer
learning_rate  = 0.0001 # Learning rate for training algorithm
minibatch_size = 2000   # Mini-batch size (N_train needs to be divisible by minibatch_size)

In [55]:
# Other parameters: ###
N_epochs = 25           # Number of times to iterate through all of the data

seed=1
np.random.seed(seed)
tf.set_random_seed(seed)

In [5]:
# Plotting parameters: ###
#Specify font sizes for plots:
plt.rcParams['axes.labelsize']  = 10
plt.rcParams['legend.fontsize'] = 10
plt.rcParams['xtick.labelsize'] = 8
plt.rcParams['ytick.labelsize'] = 8

plt.ion() # turn on interactive plotting mode

# READ IN THE DATA SET #

In [56]:
# Read in the data: ###
x_all     = np.loadtxt( 'x_L%d.txt' %L, dtype='uint8' )
y_all     = np.loadtxt( 'y_L%d.txt' %L, dtype='uint8' )
N_configs = x_all.shape[0]
N_spins   = x_all.shape[1]
L         = int( np.sqrt(N_spins/2) ) #should be the same as the L variable above

In [50]:
x_all

array([255,   1, 255,   1,   1, 255, 255,   1, 255,   1, 255,   1,   1,
         1,   1,   1, 255, 255, 255, 255, 255, 255,   1, 255, 255, 255,
         1,   1,   1, 255,   1,   1, 255,   1,   1, 255, 255, 255, 255,
       255, 255, 255, 255, 255,   1, 255, 255, 255, 255, 255, 255,   1,
       255, 255,   1,   1,   1, 255, 255,   1, 255, 255,   1,   1,   1,
         1,   1,   1, 255,   1, 255,   1, 255, 255, 255,   1,   1,   1,
       255,   1,   1, 255,   1, 255,   1, 255, 255, 255, 255, 255,   1,
         1,   1, 255,   1, 255,   1, 255,   1,   1, 255, 255,   1, 255,
         1, 255, 255,   1,   1, 255,   1,   1,   1, 255,   1,   1,   1,
         1, 255,   1,   1, 255, 255, 255,   1, 255, 255, 255, 255, 255,
       255, 255, 255,   1,   1, 255, 255,   1,   1, 255, 255,   1, 255,
       255, 255, 255,   1,   1,   1,   1,   1,   1,   1,   1, 255, 255,
         1,   1, 255,   1, 255,   1,   1, 255,   1,   1, 255, 255,   1,
       255, 255,   1,   1,   1, 255,   1,   1, 255,   1, 255,   

In [57]:
# Shuffle the data and then divide into training and validation sets: ###
indices_shuffled = np.random.permutation(N_configs)
x_all = x_all[indices_shuffled,:]
y_all = y_all[indices_shuffled]

N_train = int(frac_train*N_configs)
x_train_orig = x_all[0:N_train,:]
y_train      = y_all[0:N_train]

x_validation_orig = x_all[N_train:,:]
y_validation = y_all[N_train:]
N_validation = x_validation_orig.shape[0]

In [38]:
x_train_orig[1]

array([-1,  1, -1,  1, -1,  1, -1, -1, -1,  1,  1, -1, -1,  1, -1,  1,  1,
        1,  1, -1,  1,  1, -1,  1,  1,  1,  1, -1,  1, -1, -1, -1, -1, -1,
       -1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,
        1, -1, -1,  1,  1, -1,  1, -1,  1, -1,  1,  1,  1,  1,  1, -1,  1,
       -1, -1,  1,  1, -1, -1,  1,  1, -1, -1, -1,  1, -1, -1, -1,  1,  1,
       -1, -1,  1,  1,  1,  1,  1,  1,  1, -1,  1, -1, -1, -1,  1, -1, -1,
       -1, -1,  1, -1, -1,  1,  1,  1, -1,  1,  1, -1, -1,  1, -1, -1, -1,
       -1,  1, -1,  1,  1, -1, -1,  1,  1,  1, -1,  1,  1, -1,  1,  1,  1,
        1, -1, -1, -1, -1,  1,  1, -1, -1,  1, -1, -1, -1, -1, -1,  1, -1,
       -1, -1,  1,  1,  1, -1,  1, -1,  1, -1, -1,  1, -1, -1,  1,  1,  1,
       -1,  1, -1,  1, -1,  1, -1,  1, -1, -1,  1,  1,  1,  1, -1,  1, -1,
        1, -1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1,  1,
       -1,  1, -1,  1, -1, -1, -1, -1,  1,  1, -1, -1, -1,  1, -1, -1,  1,
       -1, -1,  1,  1,  1

In [58]:
# Enlarge the datapoints based on the patch size (because of periodic boundary conditions): ###
L_enlarged = L+patch_size-1
n0 = 2*(L_enlarged)**2
def enlarge_data(N_samples,data_orig):
    data_enlarged = np.zeros((N_samples,n0))

    for iy in range(L):
        data_enlarged[:,2*iy*L_enlarged:(2*iy*L_enlarged + 2*L)] = data_orig[:,2*iy*L:2*(iy+1)*L]
        data_enlarged[:,(2*iy*L_enlarged + 2*L):2*(iy+1)*L_enlarged] = data_orig[:,2*iy*L:(2*iy*L+2*(patch_size-1))]
    data_enlarged[:,2*L*L_enlarged:] = data_enlarged[:,0:2*L_enlarged*(patch_size-1)]
    return data_enlarged

x_train      = enlarge_data(N_train, x_train_orig)
x_validation = enlarge_data(N_validation, x_validation_orig)

print("N_train     = %d\nN_validation = %d\nL           = %d\nL_enlarged  = %d\n" %(N_train,N_validation,L,L_enlarged))

N_train     = 14000
N_validation = 6000
L           = 16
L_enlarged  = 17



# DEFINE THE NETWORK ARCHITECTURE #

In [59]:
x = tf.placeholder(tf.float32, shape=[None, n0]) # Placeholder for the spin configurations
x_reshaped = tf.reshape( x, [-1,L_enlarged,L_enlarged,num_sublattices] )
y = tf.placeholder(tf.int32, shape=[None]) # Labels

### Layer 1 (Convolutional layer): ###
W1 = tf.Variable( tf.truncated_normal([patch_size, patch_size, num_sublattices, num_filters], mean=0.0, stddev=0.01, dtype=tf.float32) )
b1 = tf.Variable( tf.constant(0.1,shape=[num_filters]) )

# Apply the convolution (note that 'VALID' means no padding):
z1 = tf.nn.conv2d(x_reshaped, W1, strides=[1, stride_xy, stride_xy, 1], padding='VALID') + b1
a1 = tf.nn.relu( z1 )
n_a1 = int( num_filters*(L/stride_xy)**2 ) # Number of outputs in the vector a1
a1_flattened = tf.reshape( a1, [-1,n_a1])

### Layer 2 (Fully-connected layer): ###
W2 = tf.Variable( tf.truncated_normal([n_a1,nH2], mean=0.0, stddev=0.01, dtype=tf.float32) )
b2 = tf.Variable( tf.constant(0.1,shape=[nH2]) )
z2 = tf.matmul(a1_flattened, W2) + b2
a2 = tf.nn.relu( z2 )

# Dropout: To reduce overfitting, we apply dropout to the neurons a2 (before the final output layer).
# We create a placeholder for the probability that a neuron's output is kept during dropout, which
# allows us to turn dropout on during training, and turn it off during validation. TensorFlow's
# tf.nn.dropout op automatically handles scaling neuron outputs in addition to masking them, so
# dropout works without any additional scaling.
keepProb_var = tf.placeholder("float")
a2_drop = tf.nn.dropout(a2, keepProb_var)

### Layer 3 (Fully-connected layer): ###
W3 = tf.Variable( tf.truncated_normal([nH2,num_labels], mean=0.0, stddev=0.01, dtype=tf.float32) )
b3 = tf.Variable( tf.constant(0.1,shape=[num_labels]) )
z3 = tf.matmul(a2_drop, W3) + b3
a3 = tf.nn.softmax( z3 )

### Network output: ###
aL = a3

### Cost function: ###
y_onehot = tf.one_hot(y,depth=num_labels) # labels are converted to one-hot representation
eps=0.0000000001 # to prevent the logs from diverging
cross_entropy = tf.reduce_mean(-tf.reduce_sum( y_onehot * tf.log(aL+eps) +  (1.0-y_onehot)*tf.log(1.0-aL+eps) , reduction_indices=[1]))
cost_func = cross_entropy

train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost_func)



# TRAINING #

In [60]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

epoch_list     = []
cost_training  = []
acc_validation = []

### Train using mini-batches for several epochs: ###
permut = np.arange(N_train)
num_iterations = 0
for epoch in range(N_epochs):
    np.random.shuffle(permut) # Randomly shuffle the indices
    x_shuffled = x_train[permut,:]
    y_shuffled = y_train[permut]

    #Loop over all the mini-batches:
    for b in range(0, N_train, minibatch_size):
        x_batch = x_shuffled[b:b+minibatch_size,:]
        y_batch = y_shuffled[b:b+minibatch_size]
        sess.run(train_step, feed_dict={x: x_batch, y:y_batch, keepProb_var:keep_prob})
        num_iterations = num_iterations + 1

    #Print results every epoch of the training algorithm:
    cost_train = sess.run(cost_func,feed_dict={x:x_train, y:y_train, keepProb_var:1.0})

    validation_output = sess.run(aL,feed_dict={x:x_validation, y:y_validation, keepProb_var:1.0})
    predicted_class = np.argmax(validation_output, axis=1)
    accuracy_validation = np.mean(predicted_class == y_validation)

    print( "Epoch %d:\n  Training cost %f\n  Validation accuracy %f\n" % (epoch, cost_train, accuracy_validation) )

    epoch_list.append(epoch)
    cost_training.append(cost_train)
    acc_validation.append(accuracy_validation)

    #fig = plt.figure(1,figsize=(10,4))
    #fig.subplots_adjust(hspace=.3,wspace=.3)
    #plt.clf()
    
    ### Plot the cost function during training: ###
    #plt.subplot(121)
    #plt.plot(epoch_list,cost_training,'o-')
    #plt.xlabel('Epoch')
    #plt.ylabel('Training cost')
    
    ### Plot the validation accuracy: ###
    #plt.subplot(122)
    #plt.plot(epoch_list,acc_validation,'o-')
    #plt.xlabel('Epoch')
    #plt.ylabel('Validation accuracy')
    #plt.pause(0.1)

#plt.savefig('gaugeTheoryClassification_CNN_results.pdf') # Save the figure showing the results in the current directory


Epoch 0:
  Training cost 1.382100
  Validation accuracy 0.507500

Epoch 1:
  Training cost 1.380554
  Validation accuracy 0.517667

Epoch 2:
  Training cost 1.373304
  Validation accuracy 0.521667

Epoch 3:
  Training cost 1.358280
  Validation accuracy 0.549000

Epoch 4:
  Training cost 1.335081
  Validation accuracy 0.618833

Epoch 5:
  Training cost 1.288927
  Validation accuracy 0.699667

Epoch 6:
  Training cost 1.220197
  Validation accuracy 0.772667

Epoch 7:
  Training cost 1.097702
  Validation accuracy 0.893333

Epoch 8:
  Training cost 0.935415
  Validation accuracy 0.950333

Epoch 9:
  Training cost 0.722207
  Validation accuracy 0.994833

Epoch 10:
  Training cost 0.503873
  Validation accuracy 0.999000

Epoch 11:
  Training cost 0.315062
  Validation accuracy 1.000000

Epoch 12:
  Training cost 0.188017
  Validation accuracy 1.000000

Epoch 13:
  Training cost 0.109463
  Validation accuracy 1.000000

Epoch 14:
  Training cost 0.064850
  Validation accuracy 1.000000

Epoch