# 1 Installation


In [None]:
!conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch

In [1]:
import torch 
print(torch.__version__)
torch.cuda.is_available()

1.2.0


True

# 2 Tensor
## 2.1 Create a tensor

In [1]:
# uninitialized tensor
x = torch.empty(5, 3)
print(x)

tensor([[1.0305e+21, 7.1833e+22, 1.2102e+25],
        [1.2415e+28, 1.0804e+27, 4.5452e+30],
        [1.8910e+23, 7.1443e+31, 7.5016e+28],
        [7.7782e+31, 6.9983e+28, 5.0840e+31],
        [1.8057e+28, 6.9983e+28, 7.3982e+20]])


In [4]:
# random initialized tensor
x = torch.rand(5, 3)
print(x)

tensor([[0.3358, 0.8925, 0.9992],
        [0.6815, 0.3979, 0.0456],
        [0.2348, 0.1740, 0.3410],
        [0.1765, 0.8558, 0.1151],
        [0.1539, 0.0895, 0.5048]])


In [6]:
# from a list
x = torch.tensor([5, 3])
print(x)

tensor([5, 3])


In [9]:
# from a existing tensor
x = torch.ones(5, 3, dtype = torch.float64)
y = torch.rand_like(x, dtype=torch.float)
print(x)
print(y)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[0.9712, 0.3796, 0.3490],
        [0.9114, 0.4722, 0.1705],
        [0.1037, 0.7482, 0.6387],
        [0.1871, 0.7565, 0.7705],
        [0.8225, 0.5013, 0.2657]])


## 2.2 Properties

In [11]:
# size/shape
x = torch.ones(5, 3)
print(x.size())
print(x.shape)

torch.Size([5, 3])
torch.Size([5, 3])


In [17]:
# Addition
x = torch.rand(5, 3)
y = torch.rand(5, 3)
print(x + y)

result = torch.empty(5, 3) # result has to be defined before
torch.add(x, y, out = result)
print(result)

print(y.add(x))

# inplace Addition
y.add_(x)
print(y)

tensor([[0.9439, 0.5048, 1.0588],
        [0.5335, 0.7277, 1.0580],
        [1.2801, 0.4771, 1.2043],
        [1.0175, 1.4859, 0.6293],
        [0.4982, 1.6123, 0.6505]])
tensor([[0.9439, 0.5048, 1.0588],
        [0.5335, 0.7277, 1.0580],
        [1.2801, 0.4771, 1.2043],
        [1.0175, 1.4859, 0.6293],
        [0.4982, 1.6123, 0.6505]])
tensor([[0.9439, 0.5048, 1.0588],
        [0.5335, 0.7277, 1.0580],
        [1.2801, 0.4771, 1.2043],
        [1.0175, 1.4859, 0.6293],
        [0.4982, 1.6123, 0.6505]])
tensor([[0.9439, 0.5048, 1.0588],
        [0.5335, 0.7277, 1.0580],
        [1.2801, 0.4771, 1.2043],
        [1.0175, 1.4859, 0.6293],
        [0.4982, 1.6123, 0.6505]])


In [21]:
# Indexing and Slicing: the result is actually a REFERENCE of the original tensor, do not allocate new memory
x = torch.ones(5, 3)
y = x[0, :]
y += 1
print(x) # x changes with y

tensor([[2., 2., 2.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])


In [32]:
# reshape : 'view()` allocates new memory, but the result shares data with the original tensor
x = torch.ones(5, 3)
z = x.view(-1, 5)
x[0, 0] -= 1 # change z will also change x and y
print(z)

tensor([[0., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])


In [34]:
# reshape with a copy
x = torch.ones(5, 3)
x_cp = x.clone().view(-1, 5)
x -= 1
print(x)
print(x_cp)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])


In [37]:
# convert an one element tensor to python scalar
x = torch.randn(1)
print(x)
print(x.item())

tensor([1.0237])
1.0237070322036743


## 2.3 Broadcasting
Broadcasting mechanism can expand one or both tensors by copying elements appropriately so that after this transformation, the two tensors have the same shape.

In [38]:
x = torch.arange(2).view(1, 2)
print(x)
y = torch.arange(3).view(3, 1)
print(y)
print(x + y)

tensor([[1, 2]])
tensor([[1],
        [2],
        [3]])
tensor([[2, 3],
        [3, 4],
        [4, 5]])


## 2.4 Saving Memory
Python's `id()` function gives us the exact address of the referenced object in memory

In [39]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y = x + y  # we allocate new memory here
print(id(y) == id_before)

False


In [41]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y[:] = x + y # Do not allocate new memory
print(id(y) == id_before)

True


In [42]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
torch.add(x, y, out = y) # Do not allocate new memory
print(id(y) == id_before)


True


In [43]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y += x # Do not allocate new memory
print(id(y) == id_before)

True


In [45]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y.add_(x) # Do not allocate new memory
print(id(y) == id_before)

True


## 2.5 Conversion to Numpy and vice versa

In [49]:
# tensor to numpy.ndarray : SHARE the memory
a = torch.ones(5)
b = a.numpy()
print(a, b)
a += 1
print(a, b)
b += 1
print(a, b)

tensor([1., 1., 1., 1., 1.]) [1. 1. 1. 1. 1.]
tensor([2., 2., 2., 2., 2.]) [2. 2. 2. 2. 2.]
tensor([3., 3., 3., 3., 3.]) [3. 3. 3. 3. 3.]


In [50]:
# numpy.ndarray to tensor using `torch.from_numpy()`: SHARE the memory
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
print(a, b)

a += 1
print(a, b)
b += 1
print(a, b)


[1. 1. 1. 1. 1.] tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.] tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
[3. 3. 3. 3. 3.] tensor([3., 3., 3., 3., 3.], dtype=torch.float64)


In [52]:
# numpy.ndarray to tensor using `torch.tensor()`: do not SHARE the memory
a = np.ones(5)
b = torch.tensor(a)
print(a, b)
a += 1
print(a, b)
b += 2
print(a, b)

[1. 1. 1. 1. 1.] tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.] tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.] tensor([3., 3., 3., 3., 3.], dtype=torch.float64)


## 2.6 Tensor on GPU

In [2]:
x = torch.ones(5, 3)
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x, device = device) # directly created on GPU
    x = x.to(device) #equivalent to `x = x.to("cuda")
    z = x + y
    print(z)
    print(z.to("cpu", torch.double)) # change the datatype simultaneously

tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]], device='cuda:0')
tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]], dtype=torch.float64)


# 3 Data Preprocessing
## 3.1 Reading the dataset

In [4]:
# Create a dataset
import os

os.makedirs(os.path.join(".", "data"), exist_ok=True)
data_file = os.path.join(".", "data", "house_tiny.csv")
with open(data_file, "w") as f:
    f.write("NumRooms,Alley,Price\n") # Column names
    f.write("NA,Pave,127500\n")
    f.write("2,NA,106000\n")
    f.write("4,NA,178100\n")
    f.write("NA,NA,140000\n")

In [5]:
# Read the dataset
import pandas as pd
data = pd.read_csv(data_file)
print(data)

   NumRooms Alley   Price
0       NaN  Pave  127500
1       2.0   NaN  106000
2       4.0   NaN  178100
3       NaN   NaN  140000


## 3.2 Handle Missing Data

In [11]:
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]
inputs = inputs.fillna(inputs.mean()) # Only deal with munnerical values
print(inputs)

   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN


In [13]:
# For categorical or discretet values, we consider NaN as a category and convert column "Alley" to several different columns according to the discrete values(categories)
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       3.0           1          0
1       2.0           0          1
2       4.0           0          1
3       3.0           0          1


## 3.3 Conversion to the Tensor Format

In [15]:
X, y = torch.tensor(inputs.values), torch.tensor(outputs.values)
X, y

(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100, 140000]))

# 4 Autograd
attributes and methods of `Tensor`:
- `requires_grad`: True, when we want to track the gradient
- `grad_fn`: return a `Function` for this Tensor
- `grad`: gradient
- `backward()`: do the backpropagation and thus change the attribute `grad` 
- `detach()`: stop to track the future computation

## 4.1 `requires_grad` and `grad_fn`

In [19]:
x = torch.ones(2, 2, requires_grad=True)
# we call the directly created tensor as `leaf` Leaf node has no `grad_fn`
print(x)
print(x.is_leaf)
print(x.grad_fn)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
True
None


In [20]:
y = x + 2
# y is created by addition and thus has `AddBackward` as `grad_fn`
print(y)
print(y.is_leaf)
print(y.grad_fn)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
False
<AddBackward0 object at 0x7f59d261b080>


In [21]:
z = y * y * 3
out = z.mean()
print(z)
print(out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


In [22]:
# change `requires_grad`
a = torch.rand(2, 2) # `requires_grad` is False by default
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True) # change `requires_grad` in-place
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7f59d261b7f0>


## 4.2 backward()
### 4.2.1 Scalar
For `loss.backward()`, if `loss` is a scalar, we do not need to add a parameter for `backward()`

In [23]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()

$\frac{dy}{dx} = \boldsymbol I, \frac{dz}{dy} = 6\boldsymbol y, \frac{dout}{dz} = \frac{1}{4}$

$\frac{dout}{dx} = \frac{3}{2}\boldsymbol y$

In [24]:
out.backward()
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [25]:
# grad is ACCUMULATED during the training
out2 = x.sum()
out2.backward()
print("d out/d x + d out2 / d x: \n", x.grad)

out3 = x.sum()
x.grad.data.zero_() # Clear the previous gradient d out/d x
out3.backward()
print("d out3/dx:\n", x.grad)

d out/d x + d out2 / d x: 
 tensor([[5.5000, 5.5000],
        [5.5000, 5.5000]])
d out3/dx:
 tensor([[1., 1.],
        [1., 1.]])
