In [1]:
import torch

# Create a list of sets of N random numbers
N = 3
inputs = torch.randn((10000,N),dtype=torch.float32)
outputs = inputs

In [3]:
import pprint

# Create a simple neural network
hidden_nodes = 3
model = torch.nn.Sequential(
    torch.nn.Linear(inputs.shape[1],hidden_nodes),
    torch.nn.ReLU(),
    torch.nn.Linear(hidden_nodes,outputs.shape[1])
)

print('Weights and bias values before training.')
for label in model.state_dict().keys():
  x = model.state_dict()[label]
  print('\n{}: '.format(label))
  pprint.pprint(x)

print('\nCheck the neural network output before training:')
with torch.no_grad():
  model.eval()
  test = torch.tensor([0.1, 0.2, 0.3])
  prediction = model(test)
  print('input: {}, output: {}'.format(test,prediction))

Weights and bias values before training.

0.weight: 
tensor([[-0.3758,  0.1270,  0.2587],
        [ 0.1623, -0.4476,  0.1158],
        [ 0.1029,  0.3594, -0.3168]])

0.bias: 
tensor([ 0.4459, -0.0161, -0.3755])

2.weight: 
tensor([[-0.2851, -0.1429,  0.1870],
        [ 0.2814, -0.4795, -0.1557],
        [ 0.0812,  0.0301,  0.0105]])

2.bias: 
tensor([-0.1628,  0.0920,  0.4408])

Check the neural network output before training:
input: tensor([0.1000, 0.2000, 0.3000]), output: tensor([-0.3086,  0.2358,  0.4823])


In [4]:
import math

# Get ready to train
model.train()

# Break the list up into smaller batches for more efficient training
numMiniBatch = int(math.floor(inputs.shape[0]/100.))
inputMiniBatches = inputs.chunk(numMiniBatch)
outputMiniBatches = outputs.chunk(numMiniBatch)

# Train the neural network
lossFunc = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=1e-4)

n_epochs = 500
for epoch in range(n_epochs):
  # Print something every 10 epochs of training
  if epoch%10 == 0:
    print('=>Starting {}/{} epochs.'.format(epoch+1,n_epochs))
  for minibatch in range(numMiniBatch):
    prediction = model(inputMiniBatches[minibatch])
    loss = lossFunc(prediction,outputMiniBatches[minibatch])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


print ('Training done!')

=>Starting 1/500 epochs.
=>Starting 11/500 epochs.
=>Starting 21/500 epochs.
=>Starting 31/500 epochs.
=>Starting 41/500 epochs.
=>Starting 51/500 epochs.
=>Starting 61/500 epochs.
=>Starting 71/500 epochs.
=>Starting 81/500 epochs.
=>Starting 91/500 epochs.
=>Starting 101/500 epochs.
=>Starting 111/500 epochs.
=>Starting 121/500 epochs.
=>Starting 131/500 epochs.
=>Starting 141/500 epochs.
=>Starting 151/500 epochs.
=>Starting 161/500 epochs.
=>Starting 171/500 epochs.
=>Starting 181/500 epochs.
=>Starting 191/500 epochs.
=>Starting 201/500 epochs.
=>Starting 211/500 epochs.
=>Starting 221/500 epochs.
=>Starting 231/500 epochs.
=>Starting 241/500 epochs.
=>Starting 251/500 epochs.
=>Starting 261/500 epochs.
=>Starting 271/500 epochs.
=>Starting 281/500 epochs.
=>Starting 291/500 epochs.
=>Starting 301/500 epochs.
=>Starting 311/500 epochs.
=>Starting 321/500 epochs.
=>Starting 331/500 epochs.
=>Starting 341/500 epochs.
=>Starting 351/500 epochs.
=>Starting 361/500 epochs.
=>Starting 3

In [5]:
# Check out the output by running the model in evaluation
with torch.no_grad():
  model.eval()
  test = torch.tensor([0.1, 0.2, 0.3])
  prediction = model(test)
  print('input: {}, prediction: {}'.format(test,prediction))

input: tensor([0.1000, 0.2000, 0.3000]), prediction: tensor([0.0999, 0.2003, 0.2997])


In [6]:
# Let's look at the weights and biases
# Check the math by hand
A = model.state_dict()['0.weight'].tolist()
a = model.state_dict()['0.bias'].tolist()
print('\nWeights and biases for first layer.')
pprint.pprint(A)
print()
pprint.pprint(a)

B = model.state_dict()['2.weight'].tolist()
b = model.state_dict()['2.bias'].tolist()
print('\nWeights and biases for second layer.')
pprint.pprint(B)
print()
pprint.pprint(b)


Weights and biases for first layer.
[[-0.5669860243797302, 0.17802953720092773, -0.22293226420879364],
 [-0.356993168592453, -0.30792301893234253, -0.34331372380256653],
 [-0.110781230032444, 0.1137191578745842, -0.5053673386573792]]

[2.225759744644165, 1.863700270652771, 1.8677211999893188]

Weights and biases for second layer.
[[-1.6340062618255615, -0.5428079962730408, 1.090437412261963],
 [1.1958799362182617, -2.199279308319092, 0.9663577675819397],
 [0.6275625824928284, -0.3754696547985077, -1.9991546869277954]]

[2.611955165863037, -0.3675835430622101, 3.036720037460327]


In [7]:
# Work out the details
x0 = test[0].item()
x1 = test[1].item()
x2 = test[2].item()

print('Input to neural network:')
print('x0={:7.4f}, x1={:7.4f},x2={:7.4f}'.format(x0,x1,x2))

# Check the math by hand
A = model.state_dict()['0.weight'].tolist()
a = model.state_dict()['0.bias'].tolist()

sum0 = A[0][0]*x0+A[0][1]*x1+A[0][2]*x2+a[0]
sum1 = A[1][0]*x0+A[1][1]*x1+A[1][2]*x2+a[1]
sum2 = A[2][0]*x0+A[2][1]*x1+A[2][2]*x2+a[2]
print('\nSums for first layer:')
print('{:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(sum0,A[0][0],x0,A[0][1],x1,A[0][2],x2,a[0]))
print('{:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(sum1,A[1][0],x0,A[1][1],x1,A[1][2],x2,a[1]))
print('{:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(sum2,A[2][0],x0,A[2][1],x1,A[2][2],x2,a[2]))

y0 = max(sum0,0)
y1 = max(sum1,0)
y2 = max(sum2,0)
print('\nApplying the ReLU to the sums:')
print('y0 = {:7.4f} = ReLU({:7.4f})'.format(y0,sum0))
print('y1 = {:7.4f} = ReLU({:7.4f})'.format(y1,sum1))
print('y2 = {:7.4f} = ReLU({:7.4f})'.format(y2,sum2))

B = model.state_dict()['2.weight'].tolist()
b = model.state_dict()['2.bias'].tolist()

z0 = B[0][0]*y0+B[0][1]*y1+B[0][2]*y2+b[0]
z1 = B[1][0]*y0+B[1][1]*y1+B[1][2]*y2+b[1]
z2 = B[2][0]*y0+B[2][1]*y1+B[2][2]*y2+b[2]
print('\nCalculating the final output:')
print('z0 = {:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(z0,B[0][0],y0,B[0][1],y1,B[0][2],y2,b[0]))
print('z1 = {:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(z1,B[1][0],y0,B[1][1],y1,B[1][2],y2,b[1]))
print('z2 = {:7.4f} = {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f} * {:7.4f} + {:7.4f}'.format(z2,B[2][0],y0,B[2][1],y1,B[2][2],y2,b[2]))

Input to neural network:
x0= 0.1000, x1= 0.2000,x2= 0.3000

Sums for first layer:
 2.1378 = -0.5670 *  0.1000 +  0.1780 *  0.2000 + -0.2229 *  0.3000 +  2.2258
 1.6634 = -0.3570 *  0.1000 + -0.3079 *  0.2000 + -0.3433 *  0.3000 +  1.8637
 1.7278 = -0.1108 *  0.1000 +  0.1137 *  0.2000 + -0.5054 *  0.3000 +  1.8677

Applying the ReLU to the sums:
y0 =  2.1378 = ReLU( 2.1378)
y1 =  1.6634 = ReLU( 1.6634)
y2 =  1.7278 = ReLU( 1.7278)

Calculating the final output:
z0 =  0.0999 = -1.6340 *  2.1378 + -0.5428 *  1.6634 +  1.0904 *  1.7278 +  2.6120
z1 =  0.2003 =  1.1959 *  2.1378 + -2.1993 *  1.6634 +  0.9664 *  1.7278 + -0.3676
z2 =  0.2997 =  0.6276 *  2.1378 + -0.3755 *  1.6634 + -1.9992 *  1.7278 +  3.0367
