In [3]:
import torch
import numpy as np
#torch has 8 bit signed/unsigned(torch.ByteTensor), 16bit, 32bit(torch.FloatTensor), 64(torch.LongTensor)

In [2]:
#3 * 2 tensor, need to initialize to zeros
#method 1 to create
a = torch.FloatTensor(3, 2)
a

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [3]:
a.zero_()

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

1. Tensor has 2 type, inplace, which ends in _ , it will overwrite the orignal tensor and return the object.
2.  Functional will copy one of the original and modify the copied, won't override the original
- Inplace is faster and memory efficient then functional

In [4]:
#method 2 to create
torch.FloatTensor([[1, 2, 3],[3, 2, 1]])

tensor([[1., 2., 3.],
        [3., 2., 1.]])

In [5]:
n = np.zeros(shape=(3, 2))
n

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [6]:
#method 3 to create(conversion)
#default numpy is creating 64bit double, so after conversion, torch use DoubleTensor to convert to torch.float64
#But space is wasted, specify the type can reduce space used
b = torch.tensor(n)
b

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]], dtype=torch.float64)

In [7]:
n = np.zeros(shape=(3, 2), dtype=np.float32)
torch.tensor(n)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [8]:
n = np.zeros(shape=(3, 2))
torch.tensor(n, dtype=torch.float32)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [9]:
#Use item() to get the scalar value
a = torch.tensor([1, 2, 3])
a
s = a.sum()
s

tensor(6)

In [10]:
s = a.sum()
s

tensor(6)

In [11]:
s.item()

6

In [12]:
torch.tensor(1)

tensor(1)

- Use Cuda to train : use torch.cuda.FloatTensor() and to(device) to convert variables, cuda:1 specifies 2nd GPU card
- (Index start from 0)
# First use torch.device to initialize an object, then use to(device) for conversion

In [13]:
a = torch.FloatTensor([2, 3])
a

tensor([2., 3.])

In [14]:
ca = a.cuda()
ca

tensor([2., 3.], device='cuda:0')

In [16]:
#calculate in gpu
a + 1

tensor([3., 4.])

In [17]:
ca + 1

tensor([3., 4.], device='cuda:0')

In [18]:
ca.device

device(type='cuda', index=0)

# Gradient
- grad is the gradient calculated
- is_left: if created by user, return True. If generated by function, return False
- requires_grad: inherit properties, need to be defined when defining tensors. Default false.

In [19]:
v1 = torch.tensor([1.0, 1.0], requires_grad=True)
#no need to calculate gradient below
v2 = torch.tensor([2.0, 2.0])

In [20]:
v_sum = v1 + v2
v_res = (v_sum * 2).sum()
v_res

tensor(12., grad_fn=<SumBackward0>)

In [21]:
v1.is_leaf, v2.is_leaf

(True, True)

In [22]:
v_sum.is_leaf,v_res.is_leaf

(False, False)

In [23]:
v1.requires_grad

True

In [24]:
v2.requires_grad

False

In [25]:
v_sum.requires_grad

True

In [26]:
v_res.requires_grad

True

In [28]:
#calculate gradient, use backward() for backward propagation for derivative
#for the gradient in v1 as 2, it means when every element in v1 increased by 1, v_res result will increased by 2
v_res.backward()
v1.grad

tensor([2., 2.])

pytorch only calculate gradient on leaf nodes with requires_grad=True

In [29]:
v2.grad

In [1]:
import torch.nn as nn

In [4]:
# 2 input and 5 output
#torch.nn inherits from nn.Module base class, with the following methods
#parameters() : return iterator to calculate gradient
#zero_grad() : initialize all gradients to zeros
#to(device): copy parameters to CPU or GPU
#state_dict(): return all modules parameters dictionary
#load_state_dict(): this will use state dictionary to initialize modules
l = nn.Linear(2, 5)
v = torch.FloatTensor([1,2])
l(v)

tensor([-0.2337, -0.8945,  0.1094, -0.4891,  1.7695], grad_fn=<AddBackward0>)