## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

# Chapter-008-003-Torch-01

In [1]:
import numpy as np
import torch
from time import time

In [2]:
torch.cuda.is_available()

False

In [3]:
tmax = 1968
bdisplay=False

In [4]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1968, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

In [5]:
print(x.shape)

(64, 1968)


In [6]:
print(y.shape)

(64, 10)


In [7]:
print('Testing NumPy Solution')

Testing NumPy Solution


In [8]:
start = time()

In [9]:
# Randomly Initialize the weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)
learning_rate = 1e-6

In [10]:
for t in range(tmax):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if bdisplay: print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

In [11]:
end = time()
time_elapsed1 = end - start
print('Time elapsed NumPy (hh:mm:ss.ms) {}'.format(time_elapsed1))

Time elapsed NumPy (hh:mm:ss.ms) 0.21899819374084473


In [12]:
print('Testing Torch Solution')

Testing Torch Solution


In [13]:
start = time()

In [14]:
dtype = torch.float

if torch.cuda.is_available():
    print('Running Torch on GPU')
    p='CPU & GPU'
    device = torch.device("cuda:0")
else:
    print('Running Torch on CPU')
    p='CPU only'
    device = torch.device("cpu")

# Create random Tensors to hold input and outputs.
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)


# Randomly initialize weights
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)

learning_rate = 1e-6

Running Torch on CPU


In [15]:
for t in range(tmax):
    # Forward pass: compute predicted y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if bdisplay: print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)

    # Update weights using gradient descent
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

In [16]:
end = time()
time_elapsed2 = end - start
print('Time elapsed Torch (hh:mm:ss.ms) {}'.format(time_elapsed2))

Time elapsed Torch (hh:mm:ss.ms) 0.21299409866333008


In [17]:
if time_elapsed2 < time_elapsed1:
    print('Torch (%s) (%0.3f secs) is %0.2f times faster than NumPy (%0.3f secs) on %d records' % (p, time_elapsed2, (time_elapsed1/time_elapsed2), time_elapsed1, tmax))
else:
    print('Torch (%s) (%0.3f secs) is %0.2f times slower than NumPy (%0.3f secs) on %d records' % (p, time_elapsed1, (time_elapsed2/time_elapsed1), time_elapsed2, tmax))

Torch (CPU only) (0.213 secs) is 1.03 times faster than NumPy (0.219 secs) on 19 records


In [18]:
print('Minimum: %7.3f' % ((y_pred - y).min().item()))
print('Mean   : %7.3f' % ((y_pred - y).mean().item()))
print('Std    : %7.3f' % ((y_pred - y).std().item()))
print('Maximum: %7.3f' % ((y_pred - y).max().item()))

Minimum: -43.387
Mean   :  -3.498
Std    :   7.787
Maximum:  27.670


## Done

In [19]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-04-22 13:42:22.697139
