# PyTorch Basics

## Init, helpers, utils, ...

In [147]:
%matplotlib inline

In [148]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision as optim
import torchvision

In [149]:
from pprint import pprint

import matplotlib.pyplot as plt
import numpy as np
from IPython.core.debugger import set_trace

# Tensors

tensors - the atoms of machine learning

## Tensors in numpy and pytorch

In [150]:
import numpy as np
from numpy.linalg import inv
from numpy.linalg import multi_dot as mdot

In [204]:
# numpy
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [153]:
# torch
torch.eye(3)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [203]:
# numpy 
X = torch.rand((5, 3))
X

tensor([[0.0134, 0.6280, 0.6851],
        [0.9599, 0.4947, 0.9580],
        [0.5945, 0.4410, 0.8043],
        [0.3570, 0.9288, 0.1820],
        [0.8503, 0.2386, 0.2215]])

In [155]:
# pytorch 
Y = torch.rand(5,3)
Y

    
print(type(Y))

<class 'torch.Tensor'>


In [156]:
X.shape

torch.Size([5, 3])

In [157]:
Y.shape

torch.Size([5, 3])

In [158]:
# numpy
X.T @ X

tensor([[1.0340, 1.0271, 1.0862],
        [1.0271, 1.3567, 1.3139],
        [1.0862, 1.3139, 2.1835]])

In [159]:
# torch 
Y.t() @ Y

tensor([[1.4583, 1.7847, 1.0369],
        [1.7847, 2.6678, 1.3518],
        [1.0369, 1.3518, 1.3196]])

In [160]:
# numpy 
inv(X.T @ X)

array([[ 4.045792  , -2.6697721 , -0.40611708],
       [-2.6697721 ,  3.5284803 , -0.7951296 ],
       [-0.40611708, -0.7951296 ,  1.1384732 ]], dtype=float32)

In [161]:
# torch
torch.inverse(Y.t() @ Y)

tensor([[ 4.2259, -2.3797, -0.8828],
        [-2.3797,  2.1194, -0.3012],
        [-0.8828, -0.3012,  1.7599]])

## More on Pytorch Tensors

Operations are also available as methods.

In [162]:
A = torch.eye(3)
A.add(1)

tensor([[2., 1., 1.],
        [1., 2., 1.],
        [1., 1., 2.]])

In [163]:
A

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

Any operation that mutates a tensor in-place has a _ suffix.

In [164]:
A.add_(1)
A

tensor([[2., 1., 1.],
        [1., 2., 1.],
        [1., 1., 2.]])

## Indexing and broadcasting

It works as expected/like numpy:

In [165]:
A

tensor([[2., 1., 1.],
        [1., 2., 1.],
        [1., 1., 2.]])

In [166]:
A[0,0]

tensor(2.)

In [167]:
A[0]

tensor([2., 1., 1.])

In [168]:
A[0:2]

tensor([[2., 1., 1.],
        [1., 2., 1.]])

In [169]:
A[:, 1:3]

tensor([[1., 1.],
        [2., 1.],
        [1., 2.]])

## Converting

In [170]:
A = torch.eye(3)
A

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [206]:
# torch --> numpy
B = A.numpy()
B

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32)

Note: torch and numpy can share the same memory / zero-copy

In [211]:
A.add_(.5)
A

tensor([[1.5000, 0.5000, 0.5000],
        [0.5000, 1.5000, 0.5000],
        [0.5000, 0.5000, 1.5000]])

In [212]:
B

array([[1.5, 0.5, 0.5],
       [0.5, 1.5, 0.5],
       [0.5, 0.5, 1.5]], dtype=float32)

In [216]:
# numpy --> torch 
torch.from_numpy(np.eye(3))

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]], dtype=torch.float64)

## Much more

In [218]:
[o for o in dir(torch) if not o.startswith("_")]

['AVG',
 'AggregationType',
 'AnyType',
 'Argument',
 'ArgumentSpec',
 'Assert',
 'BFloat16Storage',
 'BFloat16Tensor',
 'BenchmarkConfig',
 'BenchmarkExecutionStats',
 'Block',
 'BoolStorage',
 'BoolTensor',
 'BoolType',
 'BufferDict',
 'ByteStorage',
 'ByteTensor',
 'CONV_BN_FUSION',
 'CallStack',
 'Capsule',
 'CharStorage',
 'CharTensor',
 'ClassType',
 'Code',
 'CompilationUnit',
 'CompleteArgumentSpec',
 'ComplexDoubleStorage',
 'ComplexFloatStorage',
 'ConcreteModuleType',
 'ConcreteModuleTypeBuilder',
 'CudaBFloat16StorageBase',
 'CudaBoolStorageBase',
 'CudaByteStorageBase',
 'CudaCharStorageBase',
 'CudaComplexDoubleStorageBase',
 'CudaComplexFloatStorageBase',
 'CudaDoubleStorageBase',
 'CudaFloatStorageBase',
 'CudaHalfStorageBase',
 'CudaIntStorageBase',
 'CudaLongStorageBase',
 'CudaShortStorageBase',
 'DeepCopyMemoTable',
 'DeviceObjType',
 'DictType',
 'DisableTorchFunction',
 'DoubleStorage',
 'DoubleTensor',
 'EnumType',
 'ErrorReport',
 'ExecutionPlan',
 'FUSE_ADD_REL

In [219]:
[o for o in dir(A) if not o.startswith("_")]

['T',
 'abs',
 'abs_',
 'absolute',
 'absolute_',
 'acos',
 'acos_',
 'acosh',
 'acosh_',
 'add',
 'add_',
 'addbmm',
 'addbmm_',
 'addcdiv',
 'addcdiv_',
 'addcmul',
 'addcmul_',
 'addmm',
 'addmm_',
 'addmv',
 'addmv_',
 'addr',
 'addr_',
 'align_as',
 'align_to',
 'all',
 'allclose',
 'amax',
 'amin',
 'angle',
 'any',
 'apply_',
 'arccos',
 'arccos_',
 'arccosh',
 'arccosh_',
 'arcsin',
 'arcsin_',
 'arcsinh',
 'arcsinh_',
 'arctan',
 'arctan_',
 'arctanh',
 'arctanh_',
 'argmax',
 'argmin',
 'argsort',
 'as_strided',
 'as_strided_',
 'as_subclass',
 'asin',
 'asin_',
 'asinh',
 'asinh_',
 'atan',
 'atan2',
 'atan2_',
 'atan_',
 'atanh',
 'atanh_',
 'backward',
 'baddbmm',
 'baddbmm_',
 'bernoulli',
 'bernoulli_',
 'bfloat16',
 'bincount',
 'bitwise_and',
 'bitwise_and_',
 'bitwise_not',
 'bitwise_not_',
 'bitwise_or',
 'bitwise_or_',
 'bitwise_xor',
 'bitwise_xor_',
 'bmm',
 'bool',
 'byte',
 'cauchy_',
 'ceil',
 'ceil_',
 'char',
 'cholesky',
 'cholesky_inverse',
 'cholesky_solve

## But what about the GPU?

How do I use the GPU?

If you have a GPU make sure that the right pytorch is installed (check https://pytorch.org/ for details).  

In [183]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

If you have a GPU you should get something like: device(type='cuda', index=0)

You can move data to the GPU by doing .to(device).

In [223]:
data = torch.eye(3)
data.to(device)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]], device='cuda:0')

Now the computation happens on the GPU.

In [224]:
res = data + data
res

tensor([[2., 0., 0.],
        [0., 2., 0.],
        [0., 0., 2.]])

In [225]:
res.device

device(type='cpu')

Note: before v0.4 one had to use .cuda() and .cpu() to move stuff to and from the GPU. This littered the code with many:
        
    if CUDA:
        model = model.cuda()

## Automatic differentiation with autograd

Prior to v0.4 PyTorch used the class Variable to record gradients. Your had to wrap Tensors in Variables. Variables behaved exactly like Tensors.

With v0.4 Tensor can record gradients directly if you tell it to do so, e.g. torch.ones(3,requires_grad=True). There is no need for Variable anymore. Many tutorials still use Variable, be aware!

Ref

- http://pytorch.org/docs/stable/autograd.html
- http://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

You rarely use torch.autograd directly. Pretty much everything is part of torch.Tensor now. Simply add requires_grad=True to the tensors you want to calculate the gradients for. nn.Module track gradients automatically.

In [228]:
from torch import autograd

In [231]:
x = torch.tensor(2.)
x

tensor(2.)

In [233]:
x = torch.tensor(2., requires_grad=True)
x

tensor(2., requires_grad=True)

In [235]:
print(x.requires_grad)

True


In [238]:
y = x ** 2

print("Grad of x:", x.grad)

Grad of x: None


In [240]:
y = x ** 2 
y.backward()

print("Grad of x:", x.grad)

Grad of x: tensor(8.)


In [244]:
# What is goind to happen here?
# x = torch.tensor(2.)
# x.backward()

In [247]:
# Don't record the gradient
# Useful for inference

params = torch.tensor(2., requires_grad=True)

with torch.no_grad():
    y = x * x
    print(x.grad_fn)

None


nn.Module and nn.Parameter keep track of gradients for you.

In [251]:
lin = nn.Linear(2, 1, bias=True)
lin.weight

Parameter containing:
tensor([[ 0.5101, -0.5042]], requires_grad=True)

In [253]:
type(lin.weight)

torch.nn.parameter.Parameter

In [257]:
isinstance(lin.weight, torch.FloatTensor)

True

## Exercise 

- Do you remember the analytical solution to solve for the parameters of linear regression? Implement it.