# Neural Networks

Neural Networks for regression and classification

### Configure notebook

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import os

from sklearn.model_selection import train_test_split

# Set fontsizes in figures
params = {'legend.fontsize': 'large',
          'axes.labelsize': 'large',
          'axes.titlesize': 'large',
          'xtick.labelsize': 'large',
          'ytick.labelsize': 'large',
          'legend.fontsize': 'large',
          'legend.handlelength': 2}
plt.rcParams.update(params)
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [2]:
# Import code from src
sys.path.insert(0, '../src/')
from project_tools import fig_path

from neuralnetwork import *
from isingmodel import generate_1Ddata

### Regression

Generate data for 1D Ising-model energy:

In [3]:
# generate data
np.random.seed(42)
L = 40     # system size
N = 10000  # number of points
data, target = generate_1Ddata(L, N)
target = target/np.max(target)

X_train, X_test, y_train, y_test = train_test_split(data, target, train_size=0.8, test_size=0.20)

(10000, 1600)


Grid search for neural network:

In [4]:
def grid_search_nn(learning_rate, penalty, layers, activ_func, cost, batch_size, epochs, X_train, y_train):
    count = 0
    tot_steps = len(learning_rate)*len(penalty)

    models = []
    #training models
    for lr in learning_rate:
        for p in penalty:
            count += 1
            
            models.append(NeuralNetwork(layers, activ_func, cost))
            models[-1].train(X_train, y_train, lr, p, batch_size, epochs)
            
            if count % (tot_steps / 100) == 0:
                sys.stdout.write("\r" + "%d \%" % (100 * count / tot_steps))
                sys.stdout.flush()
    
    return models

def R2(y, y_pred):
    y_mean = np.mean(y)
    return 1 - np.sum((y - y_pred)**2) /np.sum((y - y_mean)**2)

Specify neural network for regression:

In [44]:
tanh = Tanh()
sig = Sigmoid()
relu = Relu()
_pass = Pass()
square_loss = SquareLoss()
np.random.seed(42)

layers = [1600, 1000, 100, 1]
activ_func = [tanh, tanh, _pass]
cost = square_loss

learning_rate = [0.00002]
penalty =       [0.0001]

models = grid_search_nn(learning_rate, penalty, layers, activ_func, cost, 100, 100, X_train, y_train)


99

In [42]:
R2_train = []
R2_test = []
for i in range(len(models)):
    y_pred = models[i].predict(X_train)[:,0]
    R2_train.append(R2(y_train, y_pred)) 
    print(y_pred[100:110])
    print(y_train[100:110])
    
    y_pred = models[i].predict(X_test)[:,0]
    R2_test.append(R2(y_test, y_pred))
    
    

    
print(R2_train)
print(R2_test)

[-0.05155751  0.2486871  -0.14854995  0.01988391 -0.15335323  0.15933833
 -0.10666503 -0.09685036  0.09591001  0.06116119]
[-0.16666667  0.         -0.33333333  0.         -0.5         0.
 -0.5         0.          0.          0.        ]
[0.38593271981205657]
[0.3507209828418437]


In [25]:
nn = NeuralNetwork([1500, 100, 1], [sig, _pass], square_loss)

### Classification

Download data and save locally:

    url_main = "https://physics.bu.edu/~pankajm/ML-Review-Datasets/isingMC/"
    data_file_name = "Ising2DFM_reSample_L40_T=All.pkl"
    label_file_name = "Ising2DFM_reSample_L40_T=All_labels.pkl"

    labels = pickle.load(urlopen(url_main + label_file_name))

    data = pickle.load(urlopen(url_main + data_file_name))
    data = np.unpackbits(data).reshape(-1, 1600)
    data = data.astype('int')

    np.save("labels", labels)
    np.save("spin_data", data)

    del data, labels

Load data

In [101]:
X = np.load("spin_data.npy")
y = np.load("labels.npy")

train_to_test_ratio=0.5 # training samples

# divide data into ordered, critical and disordered
X_ordered=X[:70000,:]
y_ordered=y[:70000]

X_critical=X[70000:100000,:]
y_critical=y[70000:100000]

X_disordered=X[100000:,:]
y_disordered=y[100000:]

# Remove critical, since we only want to train on ordered and disorderes
X=np.concatenate((X_ordered,X_disordered))
y=np.concatenate((y_ordered,y_disordered))

# pick random data points from ordered and disordered states 
# to create the training and test sets
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.1,test_size=0.1)

Specify the neural network classifier

In [109]:
tanh = Tanh()
sig = Sigmoid()
relu = Relu()
cross_entropy = CrossEntropy()

layers = [1600, 400, 1]
activ_func = [sig, sig, sig]
cost = cross_entropy

learning_rate = [0.00003]
penalty =       [0.001]

np.random.seed(42)
models = grid_search_nn(learning_rate, penalty, layers, activ_func, cost, 100, 100, X_train, y_train)

37

KeyboardInterrupt: 

In [108]:
y_pred = models[-1].predict(X_test)[:,0]
print(y_pred[:10])
print(y_test[:10])
y_pred = np.round(y_pred)

success = np.sum(y_pred == y_test)
print(success/len(y_test))

[0.97922923 0.97922923 0.97922923 0.06914239 0.97922923 0.04416221
 0.97919212 0.01236772 0.0153758  0.01281863]
[1 1 1 0 1 0 1 0 0 0]
0.9582307692307692
