In [70]:
import torch
import torchviz
import numpy as np
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files/Graphviz/bin/' # fix

# Introduction to Pytorch

## What's a Tensor?

It's a multi-dimensional array:
* Scalar = 0D
* Vector = 1D
* Matrix = 2D
* ... = 3+D


Compared to Numpy `nparray`:
1. Tensors can run on GPUs (via CUDA)
2. Tensor are optimized for differentiation (using Autograd)

In [5]:
# Creating a Tensor

array = [1, 2, 3]
tensor = torch.tensor(
    array,
    dtype=torch.float32,
    device="cpu", # or "cuda" or "cuda:0" (the first cuda device)
    requires_grad=True # Allows for the computation of the point-wise derivative
)

tensor, tensor.shape

(tensor([1., 2., 3.], requires_grad=True), torch.Size([3]))

In [13]:
# Creating a Tensor (from a Numpy Array)
# Note that a Numpy Array is much faster than a Python array, thus this is preferable

array = [1, 2, 3]
np_array = np.array(array)
tensor = torch.from_numpy(np_array).float()
tensor.requires_grad_(True) # All methods that modify in-place end with "_"

tensor, tensor.shape

(tensor([1., 2., 3.], requires_grad=True), torch.Size([3]))

In [16]:
# Define the Device you want to use
# Ideally, computations happen on GPU as they are faster
# However, not everything can sit in the GPU's RAM
# We therefore need to change the variables' context

device = "cuda" if torch.cuda.is_available() else "cpu"

tensor = tensor.to(device) # Sends to CUDA (if available)

In [20]:
# Now we can perform computations on GPU

a = torch.randn(100, 100).to(device) # Send to GPU
b = torch.clone(a).to(device) # Send to GPU
c = (a + b) # This happens on the GPU
print(c.device)
c = c.detach().cpu() # First we detach the Gradient, then we move back to CPU
print(c)
print(c.device)

cpu
tensor([[-5.0010,  1.5383, -0.2532,  ..., -3.2335,  0.8548, -0.9914],
        [ 2.0098, -1.8367,  2.4345,  ...,  0.0708, -2.0784, -1.7096],
        [ 3.9065,  2.1775,  3.2484,  ...,  2.8445,  0.1775, -1.4359],
        ...,
        [-0.3452,  0.6202,  0.3127,  ..., -2.5344, -1.1518,  0.2549],
        [-0.2588,  1.8827, -0.8011,  ...,  0.0194, -2.0182,  0.6634],
        [-0.2366,  0.5906,  0.5667,  ..., -3.3740, -5.3911,  0.7234]])
cpu


## Reshaping Tensors

In [24]:
t = torch.arange(0, 10, step=1)
t

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [31]:
t2a = t.view(2, -1) # This requires t to be contiguous in memory (shallow copy)
t2b = t.reshape(2, -1) # Same as before, but without the requirement (if not contiguous, create a deep copy)

t2a, t2b

(tensor([[0, 1, 2, 3, 4],
         [5, 6, 7, 8, 9]]),
 tensor([[0, 1, 2, 3, 4],
         [5, 6, 7, 8, 9]]))

In [38]:
x = torch.randn(2, 4, 8) # 2x4x8 is the shape
z = x[:, ::2] # Take first dimension as before; take one-every-two for second dimension
print(z.shape) # Now z is not contiguous
z.view(-1) # fails

torch.Size([2, 2, 8])


RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

In [41]:
# Other reshaping functions...

# squeeze
# unsqueeze
# expand
# transpose
# permute
# reshape

## Operations on Tensors

In [45]:
print(t)
print(t + t)
print(t - t)
print(t * t)
print(t / t)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])
tensor([nan, 1., 1., 1., 1., 1., 1., 1., 1., 1.])


In [46]:
t.log()

tensor([  -inf, 0.0000, 0.6931, 1.0986, 1.3863, 1.6094, 1.7918, 1.9459, 2.0794,
        2.1972])

In [47]:
t.exp()

tensor([1.0000e+00, 2.7183e+00, 7.3891e+00, 2.0086e+01, 5.4598e+01, 1.4841e+02,
        4.0343e+02, 1.0966e+03, 2.9810e+03, 8.1031e+03])

In [48]:
t.sqrt()

tensor([0.0000, 1.0000, 1.4142, 1.7321, 2.0000, 2.2361, 2.4495, 2.6458, 2.8284,
        3.0000])

In [49]:
t.pow(3)

tensor([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [54]:
t = torch.Tensor([2, 4, 6])
y = torch.randn(2, 2)

y

tensor([[-0.1564,  2.5631],
        [-0.1746,  0.5348]])

In [59]:
torch.matmul(y, y) # y squared

tensor([[-0.4231,  0.9701],
        [-0.0661, -0.1615]])

In [60]:
torch.matmul(t, t)

tensor(56.)

In [62]:
torch.matmul(t, y)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x3 and 2x2)

## Pratice

1. Create a 2D tensor and add a batch dimension of size 1
2. Create a random tensor of shape 5x3 in the interval [3, 7)
3. Create a tensor with values from a normal with mean=0, std=3
4. Perform a batch product between 3D tensors
5. Return a batch matrix product between a 3D tensor and a 2D tensor

In [None]:
# TODO

# Deep Learning with Pytorch

In [72]:
# Let's assume a simple Neural Network computing: y = log((w * x) + b)
# Input Function: weighted sum (w * x + b)
# Activation Function: log (very unusual)

x = torch.tensor([[1.,2.]])
w = torch.tensor([[3.,4.]], requires_grad=True)
b = torch.tensor(3., requires_grad=True)

y = torch.log((x @ w.T) + b)

y.backward()

# Print gradients
print("dy/dw:", w.grad)
print("dy/db",  b.grad)

dy/dw: tensor([[0.0714, 0.1429]])
dy/db tensor(0.0714)


![Forward and Backward Passes](img/pytorch-01.jpeg)


In [73]:
#torchviz.make_dot(y) # Requires GraphViz (doesn't work as of now)

In [76]:
# Our Dataset:
inputs = torch.randn(5, 2)
targets = torch.tensor([1, 1, 1, 0, 0], dtype=torch.float32)

In [80]:
# Build the Model
def SimpleNNModel(inputs, weight, bias):
    logits = inputs @ weight + bias         # linear layer
    activations = torch.sigmoid(logits)     # activation layer
    return activations

In [81]:
# Initialize the model
weight = torch.randn(inputs.size(-1), requires_grad=True)
bias   = torch.ones(1, requires_grad=True)

In [82]:
def BinaryCrossEntropyLoss(y_pred, y_true):
    return (-y_true * torch.log(y_pred) - (1-y_true) * torch.log(1-y_pred)).mean()

In [95]:
# Performing SGD:

print("Wieghts before iteration:", weight)

# Model
predictions = SimpleNNModel(inputs, weight=weight, bias=bias)
#print(predictions)

# Loss
error = BinaryCrossEntropyLoss(predictions, targets)
#print(error.item())

# Gradient
error.backward() # Computes gradients
#print(weight.grad, bias.grad)

# Update Params
lr = 0.001
with torch.no_grad():
    weight -= weight.grad * lr
    bias -= bias.grad * lr

# Reset gradients
weight.grad.zero_()
bias.grad.zero_()

print("Wieghts after iteration:", weight)

Wieghts before iteration: tensor([ 0.9433, -0.6566], requires_grad=True)
Wieghts after iteration: tensor([ 0.9430, -0.6561], requires_grad=True)
