In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from typing import List

# Project Description
This is a FFN trained to add integers from -10 to + 10

In [2]:
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")

GPU: NVIDIA GeForce GTX 1660 Ti with Max-Q Design is available.


In [3]:
# create the dataset
N = 10000
torch.manual_seed(42)

nums_1 = torch.randint(-10, 11, (N,1), dtype=torch.float32)
nums_2 = torch.randint(-10, 11, (N,1), dtype=torch.float32)

target = nums_1 + nums_2

In [4]:
# Concatenate along columns (dim=1)
features = torch.cat((nums_1, nums_2), dim=1)

train_data,test_data, train_labels,test_labels = train_test_split(features, target, test_size=.2)

# then convert them into PyTorch Datasets (note: already converted to tensors)
train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

# finally, translate into dataloader objects
batchsize    = 16
train_loader = DataLoader(train_data,batch_size=batchsize,shuffle=True,drop_last=True)
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])

In [5]:
test_data.tensors[0].shape[0]

2000

In [6]:
class addMachine(nn.Module):
  def __init__(self):
    super().__init__()

    ### input layer
    self.input = nn.Linear(2,16)

    ### hidden layers
    self.fc1 = nn.Linear(16,32)
    self.fc2 = nn.Linear(32,32)

    ### output layer
    self.output = nn.Linear(32,1)

  def forward(self,x):
    x = F.relu( self.input(x) )
    x = F.relu( self.fc1(x) )
    x = F.relu( self.fc2(x) )
    return self.output(x)

In [7]:
#test model on some data from the train_loader
littleData = next(iter(train_loader))

model = addMachine()

print(littleData)
model(littleData[0])

[tensor([[ 5.,  3.],
        [ 8.,  2.],
        [ 7., -8.],
        [ 3., -6.],
        [-2., -1.],
        [ 7., -5.],
        [-1.,  0.],
        [ 9.,  4.],
        [-6.,  7.],
        [ 8., -5.],
        [-3.,  4.],
        [ 0., 10.],
        [-1.,  1.],
        [-7., -9.],
        [-9., -1.],
        [ 3., -5.]]), tensor([[  8.],
        [ 10.],
        [ -1.],
        [ -3.],
        [ -3.],
        [  2.],
        [ -1.],
        [ 13.],
        [  1.],
        [  3.],
        [  1.],
        [ 10.],
        [  0.],
        [-16.],
        [-10.],
        [ -2.]])]


tensor([[ 0.1151],
        [ 0.1731],
        [ 0.0340],
        [ 0.0171],
        [-0.0501],
        [ 0.0123],
        [-0.0628],
        [ 0.2093],
        [-0.5234],
        [ 0.0298],
        [-0.2872],
        [-0.3693],
        [-0.1021],
        [ 0.1271],
        [-0.1684],
        [ 0.0047]], grad_fn=<AddmmBackward0>)

In [12]:
# train the model

# global parameter
numepochs = 1000

def trainTheModel(train_loader, test_loader, model):
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  print(f"Using device: {device}") 

  # model = model.to(device)

  # loss function and optimizer
  lossfun = nn.MSELoss()
  optimizer = torch.optim.Adam(model.parameters(),lr=.001)

  #losses
  trainLoss = torch.zeros(numepochs)
  testLoss  = torch.zeros(numepochs)

  for epochi in range(numepochs):

    # switch on training mode
    model.train()

    # loop over training data batches
    batchLoss = []
    for X,y in train_loader:
      # X, y = X.to(device), y.to(device)
      # forward pass and loss
      yHat = model(X)
      loss = lossfun(yHat,y)

      # backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # loss from this batch
      batchLoss.append(loss.item())
    # end of batch loop...

    # and get average losses across the batches
    trainLoss[epochi] = np.mean(batchLoss)

    # test accuracy
    model.eval()
    X,y = next(iter(test_loader)) # extract X,y from test dataloader
    # X, y = X.to(device), y.to(device)
    with torch.no_grad(): # deactivates autograd
      yHat = model(X)
    testLoss[epochi] = lossfun(yHat, y)
    print(f"Epoch_num: {epochi}, Curr_loss: {testLoss[epochi]}")
  # function output
  return trainLoss, testLoss

In [13]:
addition_model = addMachine()
trainTheModel(train_loader, test_loader, addition_model)

Using device: cuda:0
Epoch_num: 0, Curr_loss: 0.023433905094861984
Epoch_num: 1, Curr_loss: 0.008440238423645496
Epoch_num: 2, Curr_loss: 0.0030633111018687487
Epoch_num: 3, Curr_loss: 0.0018893880769610405
Epoch_num: 4, Curr_loss: 0.001535203424282372
Epoch_num: 5, Curr_loss: 0.0016353437677025795
Epoch_num: 6, Curr_loss: 0.001965461764484644
Epoch_num: 7, Curr_loss: 0.000908282061573118
Epoch_num: 8, Curr_loss: 0.0014230404049158096
Epoch_num: 9, Curr_loss: 0.0005467333248816431
Epoch_num: 10, Curr_loss: 0.011184841394424438
Epoch_num: 11, Curr_loss: 0.0007968527497723699
Epoch_num: 12, Curr_loss: 0.0008166541229002178
Epoch_num: 13, Curr_loss: 0.0006043299217708409
Epoch_num: 14, Curr_loss: 0.0004978130455128849
Epoch_num: 15, Curr_loss: 0.0038139973767101765
Epoch_num: 16, Curr_loss: 0.0003911943349521607
Epoch_num: 17, Curr_loss: 0.00039861383265815675
Epoch_num: 18, Curr_loss: 0.005843241233378649
Epoch_num: 19, Curr_loss: 0.0004795852000825107
Epoch_num: 20, Curr_loss: 0.0012750

KeyboardInterrupt: 

In [18]:
test_input = torch.tensor([8,-10], dtype=torch.float32)
addition_model(test_input)

tensor([-2.0046], grad_fn=<ViewBackward0>)