In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

# Create a list of sets of N random numbers
N = 3
inputs = torch.randn((10000,N),dtype=torch.float32).cuda()
outputs = inputs ** 2

In [None]:
outputs.device

In [None]:
inputs[0]

In [None]:
outputs[0]

In [None]:
# set up graph for loss curve
fig_loss, ax_loss = plt.subplots()

# create a test set
test_inputs = torch.randn((1000,N),dtype=torch.float32).cuda()
test_outputs = test_inputs ** 2

In [None]:
import pprint

# Create a simple neural network
hidden_nodes = 100
model = torch.nn.Sequential(
    torch.nn.Linear(inputs.shape[1],hidden_nodes),
    torch.nn.ReLU(),
    # torch.nn.Linear(hidden_nodes,hidden_nodes),
    # torch.nn.ReLU(),
    torch.nn.Linear(hidden_nodes,outputs.shape[1])
)
model.cuda()

print('Weights and bias values before training.')
for label in model.state_dict().keys():
    x = model.state_dict()[label]
    # print('\n{}: '.format(label))
    pprint.pprint(x)

print('\nCheck the neural network output before training:')
with torch.no_grad():
    model.eval()
    test = torch.tensor([0.1, 0.2, 0.3]).cuda()
    prediction = model(test)
    # print('input: {}, output: {}'.format(test,prediction))

In [None]:
import math

# Get ready to train
model.train()

# Break the list up into smaller batches for more efficient training
numMiniBatch = int(math.floor(inputs.shape[0]/100.))
inputMiniBatches = inputs.chunk(numMiniBatch)
outputMiniBatches = outputs.chunk(numMiniBatch)

# Train the neural network
lossFunc = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=1e-4)

n_epochs = 500
for epoch in range(n_epochs):
    # Print something every 10 epochs of training
    if epoch%10 == 0:
        print('=>Starting {}/{} epochs.'.format(epoch+1,n_epochs))
    for minibatch in range(numMiniBatch):
        prediction = model(inputMiniBatches[minibatch])
        loss = lossFunc(prediction,outputMiniBatches[minibatch])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Plot two points on the loss graph for each epoch, one for testing and one for training.
    total_prediction = model(inputs)
    total_loss = lossFunc(total_prediction, outputs).item()
    test_total_prediction = model(test_inputs)
    test_total_loss = lossFunc(test_total_prediction, test_outputs).item()
    ax_loss.plot(epoch, total_loss, 'bo', markersize=1, label='Training')
    ax_loss.plot(epoch, test_total_loss, 'go', markersize=1, label='Testing')
    ax_loss.set_xlabel('Epochs')
    ax_loss.set_ylabel('Total Loss')
# ax_loss.legend() # I didn't find an easy way to add a legend for this particular graph
    


print ('Training done!')

In [None]:
fig_loss

In [None]:
# fig_loss.savefig('loss.pdf')

In [None]:
# test a single input, not very useful
with torch.no_grad():
    model.eval()
    test = torch.tensor([0.1, 0.2, 0.3]).cuda()
    prediction = model(test)
    print('input: {}, prediction: {}'.format(test,prediction))

In [None]:
# Check out the output by running the model in evaluation
# make some residual plots

fig, ax = plt.subplots()
fig2, ax2 = plt.subplots()
fig3, ax3 = plt.subplots()


# this can be easily modified to plot output vs. input directly
with torch.no_grad():
    model.eval()
    for input in test_inputs:
        prediction = model(input)
        actual = input ** 2
        residual = actual - prediction
        ax.plot(input[0].item(),residual[0].item(), 'bo', markersize=1)
        ax.set_xlabel('1st coordinate input')
        ax.set_ylabel('1st coordinate error')
        ax2.plot(input[1].item(),residual[1].item(), 'go', markersize=1)
        ax2.set_xlabel('2nd coordinate input')
        ax2.set_ylabel('2nd coordinate error')
        ax3.plot(input[2].item(),residual[2].item(), 'co', markersize=1)
        ax3.set_xlabel('3rd coordinate input')
        ax3.set_ylabel('3rd coordinate error')

In [None]:
# fig.savefig('1st-1st.pdf')
# fig2.savefig('2nd-2nd.pdf')
# fig3.savefig('3rd-3rd.pdf')

In [None]:
# Check what proportion of the predictions falls within 0.01 absolute accuracy or 1% relative accuracy
def test_data(prediction, actual):
    "Output 1 if prediction is accurate enough and 0 otherwise"
    abs_err = abs(prediction-actual)
    rel_err = abs_err / actual
    if (abs_err < 0.01 or rel_err < 0.01):
        return 1
    else: return 0

v_test_data = np.vectorize(test_data) # This makes a vector function and avoids a for loop to improve performance


In [None]:
# I didn't find a way to get around having to .cpu() first before converting to a numpy array
accuracy = v_test_data(test_total_prediction.cpu().detach().numpy(), test_outputs.cpu().detach().numpy())

In [None]:
# I realized that this actually treats the coordinates of the vectors as separate data points, so it's
# actually computing the proportion of accurate coordinates. To fix this I'll need to find the norm of the vectors
# and check accuracy on those. 
np.sum(accuracy) / torch.numel(test_inputs)

In [None]:
# NOT TESTED
# Let's look at the weights and biases
# Check the math by hand
A = model.state_dict()['0.weight'].tolist()
a = model.state_dict()['0.bias'].tolist()
print('\nWeights and biases for first layer.')
pprint.pprint(A)
print()
pprint.pprint(a)

B = model.state_dict()['2.weight'].tolist()
b = model.state_dict()['2.bias'].tolist()
print('\nWeights and biases for second layer.')
pprint.pprint(B)
print()
pprint.pprint(b)

In [None]:
# Work out the details
x0 = test[0].item()
x1 = test[1].item()
x2 = test[2].item()

print('Input to neural network:')
print('x0={:7.4f}, x1={:7.4f},x2={:7.4f}'.format(x0,x1,x2))

# Check the math by hand
A = model.state_dict()['0.weight'].tolist()
a = model.state_dict()['0.bias'].tolist()

sum0 = A[0][0]*x0+A[0][1]*x1+A[0][2]*x2+a[0]
sum1 = A[1][0]*x0+A[1][1]*x1+A[1][2]*x2+a[1]
sum2 = A[2][0]*x0+A[2][1]*x1+A[2][2]*x2+a[2]
print('\nSums for first layer:')
print('{:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(sum0,A[0][0],x0,A[0][1],x1,A[0][2],x2,a[0]))
print('{:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(sum1,A[1][0],x0,A[1][1],x1,A[1][2],x2,a[1]))
print('{:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(sum2,A[2][0],x0,A[2][1],x1,A[2][2],x2,a[2]))

y0 = max(sum0,0)
y1 = max(sum1,0)
y2 = max(sum2,0)
print('\nApplying the ReLU to the sums:')
print('y0 = {:7.4f} = ReLU({:7.4f})'.format(y0,sum0))
print('y1 = {:7.4f} = ReLU({:7.4f})'.format(y1,sum1))
print('y2 = {:7.4f} = ReLU({:7.4f})'.format(y2,sum2))

B = model.state_dict()['2.weight'].tolist()
b = model.state_dict()['2.bias'].tolist()

z0 = B[0][0]*y0+B[0][1]*y1+B[0][2]*y2+b[0]
z1 = B[1][0]*y0+B[1][1]*y1+B[1][2]*y2+b[1]
z2 = B[2][0]*y0+B[2][1]*y1+B[2][2]*y2+b[2]
print('\nCalculating the final output:')
print('z0 = {:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(z0,B[0][0],y0,B[0][1],y1,B[0][2],y2,b[0]))
print('z1 = {:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(z1,B[1][0],y0,B[1][1],y1,B[1][2],y2,b[1]))
print('z2 = {:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(z2,B[2][0],y0,B[2][1],y1,B[2][2],y2,b[2]))