In [1]:
# https://pyimagesearch.com/2021/07/12/intro-to-pytorch-training-your-first-neural-network-using-pytorch/

In [1]:
# import the necessary packages
from collections import OrderedDict
import torch.nn as nn

from torch.optim import SGD
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs

import cv2
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
print("Torch:", torch.__version__)
print("Torchvision:", torchvision.__version__)
print("GPU:", torch.cuda.device_count(), torch.cuda.get_device_name(0)) if torch.cuda.is_available() else print("NO GPU")

Torch: 1.8.2+cu102
Torchvision: 0.9.2+cu102
GPU: 1 Quadro RTX 3000 with Max-Q Design


In [2]:
def get_training_model(inFeatures=4, hiddenDim=8, nbClasses=3):
    # construct a shallow, sequential neural network
    mlpModel = nn.Sequential(OrderedDict([
        ("hidden_layer_1", nn.Linear(inFeatures, hiddenDim)),
        ("activation_1", nn.ReLU()),
        ("output_layer", nn.Linear(hiddenDim, nbClasses))
    ]))

    # return the sequential model
    return mlpModel

In [31]:
def next_batch(inputs, targets, batchSize):
    # loop over the dataset
    for i in range(0, inputs.shape[0], batchSize):
        # yield a tuple of the current batched data and labels
        yield (inputs[i:i + batchSize], targets[i:i + batchSize])

In [14]:
# specify our batch size, number of epochs, and learning rate
BATCH_SIZE = 64
EPOCHS = 10
LR = 1e-2
# determine the device we will be using for training
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("[INFO] training using {}...".format(DEVICE))

[INFO] training using cuda...


In [15]:
# generate a 3-class classification problem with 1000 data points,
# where each data point is a 4D feature vector
print("[INFO] preparing data...")
(X, y) = make_blobs(n_samples=1000, n_features=4, centers=3, cluster_std=2.5, random_state=95)
X

[INFO] preparing data...


array([[ -2.17167054,   9.62654054,   3.41543784,   1.64007163],
       [  1.810689  ,   7.40108036,   1.64578049,   1.6343025 ],
       [ -7.60943821, -10.52535379,   8.25352165,   2.40327365],
       ...,
       [  2.51722384,   5.19314789,   6.35146645,  -0.72698056],
       [ -4.68901597, -11.59067221,   5.73998927,   5.6966984 ],
       [  1.24403287,  13.39246788,   4.95230713,   8.10454284]])

In [18]:
# check label
y[:10]

array([1, 1, 0, 2, 0, 0, 1, 2, 0, 1])

In [19]:
# create training and testing splits, and convert them to PyTorch tensors
(trainX, testX, trainY, testY) = train_test_split(X, y, test_size=0.15, random_state=95)
trainX = torch.from_numpy(trainX).float()
testX = torch.from_numpy(testX).float()
trainY = torch.from_numpy(trainY).float()
testY = torch.from_numpy(testY).float()

trainX

tensor([[ 3.5596,  4.0420,  5.9104,  0.4815],
        [-5.2407, -4.8634,  6.8165,  4.9165],
        [-9.6785, -7.1552,  6.8649,  6.1058],
        ...,
        [ 2.6384,  6.6450,  3.9682, -0.7591],
        [-5.8820, -6.9710,  6.8110,  6.8298],
        [ 3.9124,  7.9426, -0.1382, 12.2435]])

In [29]:
trainX.shape

torch.Size([850, 4])

In [77]:
# initialize our model and display its architecture
mlp = get_training_model().to(DEVICE)
mlp

Sequential(
  (hidden_layer_1): Linear(in_features=4, out_features=8, bias=True)
  (activation_1): ReLU()
  (output_layer): Linear(in_features=8, out_features=3, bias=True)
)

In [78]:
# initialize optimizer and loss function
opt = SGD(mlp.parameters(), lr=LR)
lossFunc = nn.CrossEntropyLoss()

In [79]:
%%time
# create a template to summarize current training progress
trainTemplate = "epoch: {} test loss: {:.3f} test accuracy: {:.3f}"
# loop through the epochs
for epoch in range(0, EPOCHS+40):
    # initialize tracker variables and set our model to trainable
    print("[INFO] epoch: {}...".format(epoch + 1))
    trainLoss = 0
    trainAcc = 0
    samples = 0
    mlp.train()
    # loop over the current batch of data
    for (batchX, batchY) in next_batch(trainX, trainY, BATCH_SIZE):
        # flash data to the current device, run it through our model, and calculate loss
        (batchX, batchY) = (batchX.to(DEVICE), batchY.to(DEVICE))
        predictions = mlp(batchX)
        loss = lossFunc(predictions, batchY.long())
        # zero the gradients accumulated from the previous steps,
        # perform backpropagation, and update model parameters
        opt.zero_grad()
        loss.backward()
        opt.step()
        # update training loss, accuracy, and the number of samples visited
        trainLoss += loss.item() * batchY.size(0)
        trainAcc += (predictions.max(1)[1] == batchY).sum().item()
        samples += batchY.size(0)
    
    # display model progress on the current training batch
    trainTemplate = "epoch: {} train loss: {:.3f} train accuracy: {:.3f}"
    print(trainTemplate.format(epoch + 1, (trainLoss / samples), (trainAcc / samples)))

[INFO] epoch: 1...
epoch: 1 train loss: 1.652 train accuracy: 0.421
[INFO] epoch: 2...
epoch: 2 train loss: 0.515 train accuracy: 0.747
[INFO] epoch: 3...
epoch: 3 train loss: 0.332 train accuracy: 0.879
[INFO] epoch: 4...
epoch: 4 train loss: 0.241 train accuracy: 0.939
[INFO] epoch: 5...
epoch: 5 train loss: 0.189 train accuracy: 0.965
[INFO] epoch: 6...
epoch: 6 train loss: 0.156 train accuracy: 0.973
[INFO] epoch: 7...
epoch: 7 train loss: 0.135 train accuracy: 0.974
[INFO] epoch: 8...
epoch: 8 train loss: 0.120 train accuracy: 0.976
[INFO] epoch: 9...
epoch: 9 train loss: 0.109 train accuracy: 0.976
[INFO] epoch: 10...
epoch: 10 train loss: 0.100 train accuracy: 0.978
[INFO] epoch: 11...
epoch: 11 train loss: 0.093 train accuracy: 0.981
[INFO] epoch: 12...
epoch: 12 train loss: 0.088 train accuracy: 0.981
[INFO] epoch: 13...
epoch: 13 train loss: 0.083 train accuracy: 0.981
[INFO] epoch: 14...
epoch: 14 train loss: 0.080 train accuracy: 0.981
[INFO] epoch: 15...
epoch: 15 train lo

In [80]:
# initialize tracker variables for testing, then set our model to evaluation mode
testLoss = 0
testAcc = 0
samples = 0
mlp.eval()
# initialize a no-gradient context
with torch.no_grad():
    # loop over the current batch of test data
    for (batchX, batchY) in next_batch(testX, testY, BATCH_SIZE):
        # flash the data to the current device
        (batchX, batchY) = (batchX.to(DEVICE), batchY.to(DEVICE))
        # run data through our model and calculate loss
        predictions = mlp(batchX)
        loss = lossFunc(predictions, batchY.long())
        # update test loss, accuracy, and the number of samples visited
        testLoss += loss.item() * batchY.size(0)
        testAcc += (predictions.max(1)[1] == batchY).sum().item()
        samples += batchY.size(0)
    
    # display model progress on the current test batch
    testTemplate = "epoch: {} test loss: {:.3f} test accuracy: {:.3f}"
    print(testTemplate.format(epoch + 1, (testLoss / samples), (testAcc / samples)))
    print("")

epoch: 50 test loss: 0.050 test accuracy: 0.987

