In [112]:
# import libraries
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from IPython import display
display.set_matplotlib_formats('svg')

In [134]:
# make data
nums = []
for i in range(-10,11):
    for y in range(-10,11):
        nums.append((i,y))
data = np.array(nums)
labels = np.sum(nums,axis=1)

In [135]:
# convert to tensor
dataT   = torch.tensor( data ).float()
labelsT = torch.tensor( labels ).float() # long = int64

# split the data
train_data,test_data, train_labels,test_labels = train_test_split(dataT, labelsT, test_size=.1)


# convert into PyTorch Datasets
train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

# make into dataloader objects
batchsize    = 4
train_loader = DataLoader(train_data,batch_size=batchsize,shuffle=True,drop_last=True)
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])

In [136]:
# create a class for the model
def createCalcualtor():

    class calcualtor(nn.Module):
        def __init__(self):
            super().__init__()

            ### input layer
            self.input = nn.Linear(2,20)

            ### hidden layer
            self.fc1 = nn.Linear(20,20)
            
            ### output layer
            self.output = nn.Linear(20,1)

            # forward pass
        def forward(self,x):
            x = F.relu( self.input(x) )
            x = F.relu( self.fc1(x) )
            return self.output(x)

    # create the model instance
    net = calcualtor()

    # loss function
    criterion = nn.MSELoss()

    # optimizer
    optimizer = torch.optim.Adam(net.parameters(),lr=.01)

    return net,criterion,optimizer

In [137]:
# test the model with one batch
net,criterion,optimizer = createCalcualtor()

X,y = iter(train_loader).next()
yHat = net(X)

# values are log-probability of each number (0-9)
# print(torch.exp(yHat))
print(X,y)
# now let's compute the loss
loss = criterion(yHat,y)
print(' ')
print('Loss:')
print(loss)


tensor([[  3.,   5.],
        [  1., -10.],
        [  4.,   6.],
        [  8.,   9.]]) tensor([ 8., -9., 10., 17.])
 
Loss:
tensor(140.9523, grad_fn=<MseLossBackward0>)


In [138]:
# a function that trains the model

def funtion2trainTheModel():

    # number of epochs
    numepochs = 10

    # create a new model
    net,criterion,optimizer = createCalcualtor()

    # initialize losses
    losses    = torch.zeros(numepochs)
    trainAcc  = []
    testAcc   = []


    # loop over epochs
    for epochi in range(numepochs):

        # loop over training data batches
        batchAcc  = []
        batchLoss = []
        for X,y in train_loader:

            # forward pass and loss
            yHat = net(X)
            loss = criterion(yHat,y)
            print(yHat)
            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # loss from this batch
            batchLoss.append(loss.item())

            # compute accuracy
            matches = torch.argmax(yHat,axis=1) == y     # booleans (false/true)
            matchesNumeric = matches.float()             # convert to numbers (0/1)
            accuracyPct = 100*torch.mean(matchesNumeric) # average and x100
            batchAcc.append( accuracyPct )               # add to list of accuracies
            # end of batch loop...

        # now that we've trained through the batches, get their average training accuracy
        trainAcc.append( np.mean(batchAcc) )

        # and get average losses across the batches
        losses[epochi] = np.mean(batchLoss)

        # test accuracy
        X,y = next(iter(test_loader)) # extract X,y from test dataloader
        yHat = net(X)

        # compare the following really long line of code to the training accuracy lines
        testAcc.append( 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()) )

        # end epochs

        # function output
    return trainAcc,testAcc,losses,net


In [139]:
trainAcc,testAcc,losses,net = funtion2trainTheModel()

tensor([[-0.0161],
        [ 0.2503],
        [ 0.0224],
        [ 0.5492]], grad_fn=<AddmmBackward0>)
tensor([[0.1419],
        [0.2410],
        [0.3327],
        [0.0050]], grad_fn=<AddmmBackward0>)
tensor([[0.9879],
        [0.5523],
        [1.1707],
        [0.3139]], grad_fn=<AddmmBackward0>)
tensor([[0.6831],
        [0.8612],
        [1.0084],
        [0.7689]], grad_fn=<AddmmBackward0>)
tensor([[1.1877],
        [0.1952],
        [1.0450],
        [0.7768]], grad_fn=<AddmmBackward0>)
tensor([[0.6241],
        [1.7245],
        [0.6555],
        [0.9462]], grad_fn=<AddmmBackward0>)
tensor([[0.5712],
        [0.5544],
        [0.7030],
        [0.4645]], grad_fn=<AddmmBackward0>)
tensor([[0.7608],
        [0.5016],
        [0.3698],
        [3.1212]], grad_fn=<AddmmBackward0>)
tensor([[1.3034],
        [0.0292],
        [0.4160],
        [1.8020]], grad_fn=<AddmmBackward0>)
tensor([[4.0723],
        [2.7493],
        [2.7082],
        [3.2447]], grad_fn=<AddmmBackward0>)
tensor

tensor([[-1.7899],
        [-0.2174],
        [ 2.1554],
        [ 1.6847]], grad_fn=<AddmmBackward0>)
tensor([[ 0.0478],
        [-0.0747],
        [ 1.3732],
        [ 1.7366]], grad_fn=<AddmmBackward0>)
tensor([[ 0.2371],
        [-1.7498],
        [-0.3392],
        [ 3.4198]], grad_fn=<AddmmBackward0>)
tensor([[-1.3708],
        [ 1.2238],
        [ 3.0788],
        [ 0.1640]], grad_fn=<AddmmBackward0>)
tensor([[-0.1864],
        [ 2.1694],
        [-0.2290],
        [ 3.8159]], grad_fn=<AddmmBackward0>)
tensor([[-1.8655],
        [-0.9563],
        [ 2.6529],
        [-0.1481]], grad_fn=<AddmmBackward0>)
tensor([[-1.7336],
        [ 2.6081],
        [-0.0658],
        [ 0.8900]], grad_fn=<AddmmBackward0>)
tensor([[0.0441],
        [1.4534],
        [4.1425],
        [0.0801]], grad_fn=<AddmmBackward0>)
tensor([[ 1.4551],
        [ 0.2242],
        [ 0.6766],
        [-0.8728]], grad_fn=<AddmmBackward0>)
tensor([[ 0.4806],
        [-1.0946],
        [ 3.3190],
        [-1.7327]], 

tensor([[-3.4549],
        [-1.6016],
        [-1.9332],
        [ 0.4879]], grad_fn=<AddmmBackward0>)
tensor([[ 0.1913],
        [-0.1126],
        [-0.6229],
        [ 1.1487]], grad_fn=<AddmmBackward0>)
tensor([[-1.2977],
        [-0.7881],
        [ 1.7171],
        [-2.0999]], grad_fn=<AddmmBackward0>)
tensor([[-2.0781],
        [-0.0417],
        [ 0.7305],
        [-0.6013]], grad_fn=<AddmmBackward0>)
tensor([[-3.2124],
        [-1.4693],
        [ 0.0774],
        [-2.9114]], grad_fn=<AddmmBackward0>)
tensor([[-0.7392],
        [-1.2494],
        [-0.4548],
        [ 0.0992]], grad_fn=<AddmmBackward0>)
tensor([[ 1.8224],
        [-3.5385],
        [-2.9747],
        [-0.7878]], grad_fn=<AddmmBackward0>)
tensor([[ 0.5576],
        [-5.5437],
        [-0.0263],
        [ 0.2577]], grad_fn=<AddmmBackward0>)
tensor([[ 0.5886],
        [-4.0836],
        [-0.5780],
        [-0.0690]], grad_fn=<AddmmBackward0>)
tensor([[-4.2740],
        [ 0.1482],
        [ 2.1312],
        [-0.9905

tensor([[ 2.5009],
        [-2.9420],
        [ 0.8224],
        [-0.1636]], grad_fn=<AddmmBackward0>)
tensor([[-1.8836],
        [-0.6481],
        [-0.2560],
        [ 0.2390]], grad_fn=<AddmmBackward0>)
tensor([[ 0.4574],
        [ 0.9126],
        [-1.5951],
        [ 0.3789]], grad_fn=<AddmmBackward0>)
tensor([[ 2.3090],
        [-0.8184],
        [-0.3424],
        [ 2.1904]], grad_fn=<AddmmBackward0>)
tensor([[ 0.6803],
        [ 0.0637],
        [-0.0090],
        [ 2.6826]], grad_fn=<AddmmBackward0>)
tensor([[-1.7134],
        [-0.5620],
        [-3.3613],
        [ 0.5054]], grad_fn=<AddmmBackward0>)
tensor([[ 0.2282],
        [-1.9867],
        [ 2.6159],
        [-2.8227]], grad_fn=<AddmmBackward0>)
tensor([[ 1.1426],
        [ 2.4801],
        [-0.3441],
        [ 0.1227]], grad_fn=<AddmmBackward0>)
tensor([[ 1.0582],
        [-1.4704],
        [ 2.2266],
        [ 0.0642]], grad_fn=<AddmmBackward0>)
tensor([[-0.1672],
        [ 0.9961],
        [-3.1484],
        [ 2.1139

tensor([[ 0.0478],
        [-0.0288],
        [ 0.1224],
        [ 2.8085]], grad_fn=<AddmmBackward0>)
tensor([[ 1.0937],
        [-3.5469],
        [ 1.4983],
        [ 0.5892]], grad_fn=<AddmmBackward0>)
tensor([[ 2.2959],
        [-3.5205],
        [ 0.4146],
        [ 1.0790]], grad_fn=<AddmmBackward0>)
tensor([[-1.8369],
        [-0.4151],
        [ 1.3252],
        [-0.0940]], grad_fn=<AddmmBackward0>)
tensor([[ 0.8451],
        [-3.3952],
        [ 0.4535],
        [-1.0259]], grad_fn=<AddmmBackward0>)
tensor([[2.5497],
        [2.4179],
        [0.7679],
        [0.6030]], grad_fn=<AddmmBackward0>)
tensor([[-1.9210],
        [ 1.5680],
        [ 0.0120],
        [ 0.5230]], grad_fn=<AddmmBackward0>)
tensor([[ 0.8445],
        [-0.5909],
        [-0.0243],
        [-1.2273]], grad_fn=<AddmmBackward0>)
tensor([[-1.7985],
        [-2.1903],
        [ 0.1693],
        [-0.8610]], grad_fn=<AddmmBackward0>)
tensor([[ 0.2411],
        [-2.0006],
        [-1.7865],
        [ 2.3870]], 

In [129]:
trainAcc

[4.79798,
 4.79798,
 4.79798,
 4.79798,
 4.79798,
 4.79798,
 4.79798,
 4.79798,
 4.79798,
 4.79798]

In [141]:
# a function that builds the model

def createADDmodel():

  # model architecture
  ADDclassify = nn.Sequential(
      nn.Linear(2,30),  # input layer
      nn.ReLU(),        # activation unit
      nn.Linear(30,1),  # hidden layer
      nn.ReLU(),        # activation unit
      nn.Linear(1,1),   # output unit
        )

  # loss function
  lossfun = nn.MSELoss()

  # optimizer
  optimizer = torch.optim.Adam(ADDclassify.parameters(),lr=.01)

  # model output
  return ADDclassify,lossfun,optimizer

In [155]:
# a function that trains the model

# a fixed parameter
numepochs = 20

def trainTheModel(ADDmodel):

  # initialize losses
  losses = torch.zeros(numepochs)

  # loop over epochs
  for epochi in range(numepochs):

    numloss = []
    # loop through "minibatches" of N=1
    for num,ans in zip(dataT,labelsT):

      # forward pass
      yHat = ADDmodel(num)

      # compute loss
      loss = lossfun(yHat,ans)
      numloss.append(loss.item())

      # backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
  
  losses[epochi] = np.mean(numloss)
  
  
  # final forward pass
  with torch.no_grad():
    predictions = ADDmodel(dataT)
  
  # compute the predictions and report accuracy (is this really different from loss?)
  trainacc = 100*torch.mean( (np.abs(predictions-labels)<1).float() ) # <1 for rounding


  ### now test with new data!
  # create brand new test data
  TESTdata = torch.randint(low=-10,high=11,size=(100,2)).float()
  TESTlabels = torch.sum(TESTdata,axis=1,keepdim=True)
  
  # evaluate the model and compute accuracy
  with torch.no_grad():
    predictions = ADDmodel(TESTdata)
  testacc = 100*torch.mean( (np.abs(predictions-TESTlabels)<1).float() )
  
  # EOF
  return losses,trainacc,testacc,ADDmodel

In [156]:
# test the model once to make sure it runs
AddingMachine,lossfun,optimizer = createADDmodel()
losses,trainacc,testacc,ADDmodel = trainTheModel(AddingMachine)

# print the results
trainacc,testacc
# losses

(tensor(6.4803), tensor(84.))

In [157]:
# run the model 10 times to check reproducibility

for i in range(10):

  # create a new 'machine' and train it
  AddingMachine,lossfun,optimizer = createADDmodel()
  _,trainacc,testacc,ADDmodel = trainTheModel(AddingMachine)

  # report accuracy
  print('Model instance %s, final TRAIN/TEST accuracies: %g%%, %g%%' %(i+1,trainacc,testacc))

Model instance 1, final TRAIN/TEST accuracies: 9.29705%, 8%
Model instance 2, final TRAIN/TEST accuracies: 9.29705%, 9%
Model instance 3, final TRAIN/TEST accuracies: 9.29705%, 11%
Model instance 4, final TRAIN/TEST accuracies: 6.52763%, 63%
Model instance 5, final TRAIN/TEST accuracies: 6.32401%, 62%
Model instance 6, final TRAIN/TEST accuracies: 6.69628%, 19%
Model instance 7, final TRAIN/TEST accuracies: 6.53483%, 87%
Model instance 8, final TRAIN/TEST accuracies: 6.04841%, 49%
Model instance 9, final TRAIN/TEST accuracies: 6.39291%, 58%
Model instance 10, final TRAIN/TEST accuracies: 6.59756%, 77%
