In [15]:
import numpy as np

import warnings
#Comment this to turn on warnings
warnings.filterwarnings('ignore')

np.random.seed() # shuffle random seed generator

# Ising model parameters
L=40 # linear system size
J=-1.0 # Ising interaction
T=np.linspace(0.25,4.0,16) # set of temperatures
T_c=2.26 # Onsager critical temperature in the TD limit

##### prepare training and test data sets

import pickle,os
from sklearn.model_selection import train_test_split

###### define ML parameters
num_classes=2
train_to_test_ratio=0.5 # training samples

path_to_data=os.path.expanduser('~')+'/Desktop/Uni/Oslo/MachineLearning/project2/'
file_name = "Ising2DFM_reSample_L40_T=All.pkl"
data = pickle.load(open(path_to_data+file_name,'rb'))
data = np.unpackbits(data).reshape(-1, 1600) # Decompress array and reshape for convenience
data=data.astype('int')
data[np.where(data==0)]=-1 # map 0 state to -1 (Ising variable can take values +/-1)

file_name = "Ising2DFM_reSample_L40_T=All_labels.pkl" # this file contains 16*10000 samples taken in T=np.arange(0.25,4.0001,0.25)
labels = pickle.load(open(path_to_data+file_name,'rb')) # pickle reads the file and returns the Python object (here just a 1D array with the binary labels)

# divide data into ordered, critical and disordered
X_ordered=data[:70000,:]
Y_ordered=labels[:70000]

X_critical=data[70000:100000,:]
Y_critical=labels[70000:100000]

X_disordered=data[100000:,:]
Y_disordered=labels[100000:]

del data,labels

# define training and test data sets
X=np.concatenate((X_ordered,X_disordered))
Y=np.concatenate((Y_ordered,Y_disordered))

# pick random data points from ordered and disordered states 
# to create the training and test sets
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,train_size=train_to_test_ratio)

# full data set
X=np.concatenate((X_critical,X))
Y=np.concatenate((Y_critical,Y))

print('X_train shape:', X_train.shape)
print('Y_train shape:', Y_train.shape)
print()
print(X_train.shape[0], 'train samples')
print(X_critical.shape[0], 'critical samples')
print(X_test.shape[0], 'test samples')
Y_train

X_train shape: (65000, 1600)
Y_train shape: (65000,)

65000 train samples
30000 critical samples
65000 test samples


array([1, 1, 1, ..., 0, 1, 0], dtype=int64)

In [16]:
  # building our neural network

n_inputs, n_features = X_train.shape
n_hidden_neurons = 50
n_categories = 2

# we make the weights normally distributed using numpy.random.randn

# weights and bias in the hidden layer
hidden_weights = np.random.randn(n_features, n_hidden_neurons)
hidden_bias = np.zeros(n_hidden_neurons) + 0.01

# weights and bias in the output layer
output_weights = np.random.randn(n_hidden_neurons, n_categories)
output_bias = np.zeros(n_categories) + 0.01

In [17]:
# setup the feed-forward pass, subscript h = hidden layer

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def feed_forward(X):
    # weighted sum of inputs to the hidden layer
    z_h = np.matmul(X, hidden_weights) + hidden_bias
    # activation in the hidden layer
    a_h = sigmoid(z_h)
    
    # weighted sum of inputs to the output layer
    z_o = np.matmul(a_h, output_weights) + output_bias
    # softmax output
    # axis 0 holds each input and axis 1 the probabilities of each category
    exp_term = np.exp(z_o)
    probabilities = exp_term / np.sum(exp_term, axis=1, keepdims=True)
    
    return probabilities

probabilities = feed_forward(X_train)
print("probabilities = (n_inputs, n_categories) = " + str(probabilities.shape))
print("probability that image 0 is in category 0,1= \n" + str(probabilities[0]))
print("probabilities sum up to: " + str(probabilities[0].sum()))
print()

# we obtain a prediction by taking the class with the highest likelihood
def predict(X):
    probabilities = feed_forward(X)
    return np.argmax(probabilities, axis=1)

predictions = predict(X_train)
print("predictions = (n_inputs) = " + str(predictions.shape))
print("prediction for image 0: " + str(predictions[0]))
print("correct label for image 0: " + str(Y_train[0]))

# to categorical turns our integer vector into a onehot representation
from sklearn.metrics import accuracy_score

# one-hot in numpy
def to_categorical_numpy(integer_vector):
    n_inputs = len(integer_vector)
    n_categories = np.max(integer_vector) + 1
    onehot_vector = np.zeros((n_inputs, n_categories))
    onehot_vector[range(n_inputs), integer_vector] = 1
    
    return onehot_vector

#Y_train_onehot, Y_test_onehot = to_categorical(Y_train), to_categorical(Y_test)
Y_train_onehot, Y_test_onehot = to_categorical_numpy(Y_train), to_categorical_numpy(Y_test)

def feed_forward_train(X):
    # weighted sum of inputs to the hidden layer
    z_h = np.matmul(X, hidden_weights) + hidden_bias
    # activation in the hidden layer
    a_h = sigmoid(z_h)
    
    # weighted sum of inputs to the output layer
    z_o = np.matmul(a_h, output_weights) + output_bias
    # softmax output
    # axis 0 holds each input and axis 1 the probabilities of each category
    exp_term = np.exp(z_o)
    probabilities = exp_term / np.sum(exp_term, axis=1, keepdims=True)
    
    # for backpropagation need activations in hidden and output layers
    return a_h, probabilities

def backpropagation(X, Y):
    a_h, probabilities = feed_forward_train(X)
    
    # error in the output layer
    error_output = probabilities - Y
    # error in the hidden layer
    error_hidden = np.matmul(error_output, output_weights.T) * a_h * (1 - a_h) #sigma L in lecture
    
    # gradients for the output layer
    output_weights_gradient = np.matmul(a_h.T, error_output)
    output_bias_gradient = np.sum(error_output, axis=0)
    
    # gradient for the hidden layer
    hidden_weights_gradient = np.matmul(X.T, error_hidden)
    hidden_bias_gradient = np.sum(error_hidden, axis=0)

    return output_weights_gradient, output_bias_gradient, hidden_weights_gradient, hidden_bias_gradient,error_output

print("Old accuracy on training data: " + str(accuracy_score(predict(X_train), Y_train)))
score_old_string = str(accuracy_score(predict(X_train), Y_train))

probabilities = (n_inputs, n_categories) = (65000, 2)
probability that image 0 is in category 0,1= 
[0.98316189 0.01683811]
probabilities sum up to: 0.9999999999999999

predictions = (n_inputs) = (65000,)
prediction for image 0: 0
correct label for image 0: 1
Old accuracy on training data: 0.3618769230769231


In [18]:
eta = 0.1          #initializing learning rate eta
lmbd = 0.01        #initializing regulation parameter lamda
iterations= 10
error_old = 300000 # we initialize with a large value to ensure the absolute error in the output layer
                   # is smaller

for i in range(iterations):
    #calculation of gradients and error in the output layer using backpropagation
    dWo, dBo, dWh, dBh,error = backpropagation(X_train, Y_train_onehot)
    
    # case absolute error increased in last iteration --> overshooting occured  
    if(np.sum(abs(error_old)<np.sum(abs(error)))):
            
            # reset weigths and biases -> reverse operation 
            output_weights = output_weights_old+(eta*dWo_old)
            output_bias = output_bias_old+(eta * dBo_old)
            hidden_weights = hidden_weights_old+(eta * dWh_old)
            hidden_bias = hidden_bias_old+(eta * dBh_old)
            
            # halving learning rate
            eta = 0.5*eta 
            
            # update weights and biases with respect to the new learning rate
            output_weights = output_weights-(eta * dWo_old)
            output_bias = output_bias-(eta * dBo_old)
            hidden_weights =hidden_weights-(eta * dWh_old)
            hidden_bias =hidden_bias-(eta * dBh_old)
            
            #store weights and biases in case we still overshoot with the halved learning rate
            output_weights_old = output_weights 
            output_bias_old = output_bias 
            hidden_weights_old = hidden_weights
            hidden_bias_old = hidden_bias
            
    # case absolute error decreased in last iteration
    # initialization must ensure that else part of if-statement is exercised in first loop
    else:
            #raising eta
            eta = 1.05*eta
            
             # regularization term gradients
            dWo  = dWo + lmbd * output_weights
            dWh  = dWh + lmbd * hidden_weights
            
            # storing weights and biases and gradients in case we overshoot
            output_weights_old = output_weights 
            output_bias_old = output_bias 
            hidden_weights_old = hidden_weights
            hidden_bias_old = hidden_bias
            dWo_old =dWo
            dBo_old =dBo
            dWh_old =dWh
            dBh_old =dBh
        
            # update weights and biases
            output_weights = output_weights-(eta * dWo)
            output_bias = output_bias-(eta * dBo)
            hidden_weights =hidden_weights-(eta * dWh)
            hidden_bias =hidden_bias-(eta * dBh)
            
            #update error
            error_old = error   
print("New accuracy on training data: " + str(accuracy_score(predict(X_train), Y_train)))

New accuracy on training data: 0.4624923076923077


In [19]:
print("Old accuracy on training data: " + score_old_string)
print("New accuracy on training data: " + str(accuracy_score(predict(X_train), Y_train)))

Old accuracy on training data: 0.3618769230769231
New accuracy on training data: 0.4624923076923077
