# Part 1 for Pytorch
## 1. Check Pytorch version and if CUDA is installed

In [16]:
import torch
print(torch.__version__)

1.2.0


In [17]:
torch.cuda.is_available()

False

In [18]:
torch.version.cuda

## 2. Why Deep Learning uses GPUs
Cuda is a API for GPU computing, NN has a lot of simple computation that could be applied for parallel computing.

In [19]:
t = torch.tensor([1,2,3])    # Create a tensor on CPU
t

tensor([1, 2, 3])

In [None]:
t = t.cuda()    # Use tensor on CUDA
t

## Tensors
* number, array, 2d-array -> computer science
* scalar, vector, matrix -> mathematics
* **A Tensor is a multi-dimensional array, or a n-d array**
### Rank, Axes, and Shape
    * **Rank:** number of dimension of the Tensor, i.e. how many indices we need to access a data.
    
In CNN, a tensor is always in shape: **[B, C, H, W] --> Batch_size, Color_channel, Height, Width**.<br>
Through convolution layers, the shape of the tensor will be changed. Each filter will create a **feature map**.

In [23]:
dd = [[1,2,3],[4,5,6],[7,8,9]]
t = torch.Tensor(dd)    # transfer a list to a tensor
t.shape                 # get the shape of the tensor

torch.Size([3, 3])

In [24]:
t.reshape(1,9)    # reshape the tensor

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

## 3. Neural Network Programming

In [25]:
import torch
import numpy as np

### 3.1 Data type of tensor

In [33]:
t1 = torch.tensor([1,2,3])
t1.dtype

torch.int64

In [34]:
t2 = torch.tensor([1.0, 2.0, 3.0])
t2.dtype

torch.float32

**t1 + t2** will be a error because of different data type

### 3.2 Tensor creation options using data

In [36]:
data = np.array([1,2,3])
type(data)

numpy.ndarray

In [37]:
torch.Tensor(data)

tensor([1., 2., 3.])

In [38]:
torch.tensor(data)    #"tensor", not "Tensor", a Factory function

tensor([1, 2, 3], dtype=torch.int32)

In [39]:
torch.as_tensor(data)

tensor([1, 2, 3], dtype=torch.int32)

In [40]:
torch.from_numpy(data)

tensor([1, 2, 3], dtype=torch.int32)

### 3.3 Tensor creation options without data

In [41]:
torch.eye(2)  # create a identity matrix

tensor([[1., 0.],
        [0., 1.]])

In [42]:
torch.zeros(2,2)  # create a tensor of all 0s

tensor([[0., 0.],
        [0., 0.]])

In [43]:
torch.ones(2,2)   # create a tensor of all 1s

tensor([[1., 1.],
        [1., 1.]])

In [44]:
torch.rand(2, 2)  # create a tensor of random numbers

tensor([[0.4344, 0.9925],
        [0.5343, 0.3987]])

### 3.4 Creating PyTorch Tensors - Best Options

In [45]:
data = np.array([1,2,3])
t1 = torch.Tensor(data)
t2 = torch.tensor(data)
t3 = torch.as_tensor(data)
t4 = torch.from_numpy(data)
# t2,t3,t4 are all Factory functions

In [51]:
# differences are on the data type
print(t1)   # input int, output float
print(t2)   # input int, output int
print(t3)
print(t4)

tensor([1., 2., 3.])
tensor([1, 2, 3], dtype=torch.int32)
tensor([1, 2, 3], dtype=torch.int32)
tensor([1, 2, 3], dtype=torch.int32)


In [54]:
print(t1.dtype)
print(t2.dtype)
print(t3.dtype)
print(t4.dtype)

torch.float32
torch.int32
torch.int32
torch.int32


In [55]:
torch.get_default_dtype()

torch.float32

In [58]:
# float in, float out
torch.tensor(np.array([1., 2. ,3.]))    

tensor([1., 2., 3.], dtype=torch.float64)

In [59]:
# set the data type of output
torch.tensor(np.array([1, 2, 3]), dtype = torch.float64)

tensor([1., 2., 3.], dtype=torch.float64)

#### Tensor and memory sharing:
Change data after create tensor, **t1 and t2 unchanged, t3 and t4 changed:**<br>
<img src = "Tensor_and_memory.png" height="450" width="450">

### 3.5 Flatten, Reshape, and Squeeze Explained
Tensor operation types:
1. Reshaping operations
2. Element-wise operations
3. Reduction operations
4. Access operations

**Data should be fattened when being passed through layers**

In [62]:
import torch

In [65]:
t = torch.tensor([
    [1,1,1,1],
    [2,2,2,2],
    [3,3,3,3]], dtype = torch.float32)

In [66]:
t.size()

torch.Size([3, 4])

In [67]:
t.shape

torch.Size([3, 4])

In [69]:
len(t.shape)    # number of dimensions

2

In [71]:
torch.tensor(t.shape).prod()   # number of elements in the tensor

tensor(12)

In [72]:
t.numel()   # "number of element"

12

In [87]:
print("****************** Only Reshape ***********************")
print(t.reshape(1, 12))
print(t.reshape(1, 12).shape)
print('\n')

print("******** Reshape then squeeze, only 1 dimension remains ********")
print(t.reshape(1, 12).squeeze())
print(t.reshape(1, 12).squeeze().shape)
print('\n')

print("****** Reshape, squeeze, then unsqueeze, dimenson unchanged *******")
print(t.reshape(1, 12).squeeze().unsqueeze(dim=0))
print(t.reshape(1, 12).squeeze().unsqueeze(dim=0).shape)

****************** Only Reshape ***********************
tensor([[1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.]])
torch.Size([1, 12])


******** Reshape then squeeze, only 1 dimension remains ********
tensor([1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.])
torch.Size([12])


****** Reshape, squeeze, then unsqueeze, dimenson unchanged *******
tensor([[1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.]])
torch.Size([1, 12])


In [93]:
# try to flatten a tensor
def flatten(t):
    t = t.reshape(1, -1)
    t = t.squeeze()
    return t

print(flatten(t), "      #reshape and squeeze(flatten)")
print(t.reshape(1, 12), "    #reshape only") 

tensor([1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.])       #reshape and squeeze(flatten)
tensor([[1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.]])     #reshape only


In [94]:
# concatenate tensors
t1 = torch.tensor([
    [1, 2],
    [3, 4]
])

t2 = torch.tensor([
    [5, 6],
    [7, 8]
])

torch.cat((t1, t2), dim=0)

tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])

### 3.6 CNN Flatten Operation Visualized

In [95]:
import torch

In [96]:
t1 = torch.tensor([[1,1,1,1],
                   [1,1,1,1],
                   [1,1,1,1],
                   [1,1,1,1]])

t2 = torch.tensor([[2,2,2,2],
                   [2,2,2,2],
                   [2,2,2,2],
                   [2,2,2,2]])

t3 = torch.tensor([[3,3,3,3],
                   [3,3,3,3],
                   [3,3,3,3],
                   [3,3,3,3]])

In [98]:
# use stach to concatenate 3 tensors alone a new axis
t = torch.stack((t1, t2, t3))    
t.shape   # batch_size, height, width

torch.Size([3, 4, 4])

In [99]:
# add one color channel to the data
t = t.reshape(3, 1, 4, 4)    # batch, channel, height, width
t

tensor([[[[1, 1, 1, 1],
          [1, 1, 1, 1],
          [1, 1, 1, 1],
          [1, 1, 1, 1]]],


        [[[2, 2, 2, 2],
          [2, 2, 2, 2],
          [2, 2, 2, 2],
          [2, 2, 2, 2]]],


        [[[3, 3, 3, 3],
          [3, 3, 3, 3],
          [3, 3, 3, 3],
          [3, 3, 3, 3]]]])

In [102]:
print(t[0], "\n")
print(t[0][0], "\n")
print(t[0][0][0], "\n")
print(t[0][0][0][0], "\n")

tensor([[[1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1],
         [1, 1, 1, 1]]]) 

tensor([[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]) 

tensor([1, 1, 1, 1]) 

tensor(1) 



In [107]:
# flatten the tensor again! With multiple methods
t.reshape(1,-1)[0]

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [108]:
t.reshape(-1)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [109]:
t.view(t.numel())

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [110]:
t.flatten()    # use flatten(), all images will be flattened into one

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [112]:
# how to flatten tensor in specific dimensions?
# start_dim is the where to begin the flatten operation
t.flatten(start_dim = 1).shape    

torch.Size([3, 16])

In [113]:
t.flatten(start_dim=1)

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]])

### 3.7 Broadcasting and Element-wise Operations

Only tensors with the same shape could perform element-wise operations.

In [2]:
import torch
import numpy as np

In [3]:
t1 = torch.tensor([
    [1,2],
    [3,4]
], dtype = torch.float32)

t2 = torch.tensor([
    [9, 8],
    [7, 6]
], dtype = torch.float32)

In [4]:
# Example from the first axis
t1[0]

tensor([1., 2.])

In [5]:
# Example from the second axis
t1[0][0]

tensor(1.)

In [6]:
t1 + t2    # t1 and t2 must be in the same shape

tensor([[10., 10.],
        [10., 10.]])

**Why scalar values(with 0 shape) could do element-wise operation?** <br>
Answer: scalar is broadcasted to the tensor shape <br>
E.g. t1 + 2 --> 2 is broadcasted to the shape of t1, which will be element-wise operation. <br><br>
**Operations of scalar values: +, -, *, /， both two ways are the same:**

In [7]:
t1 + 2
t1.add(2)

tensor([[3., 4.],
        [5., 6.]])

In [8]:
t1 - 2
t1.sub(2)

tensor([[-1.,  0.],
        [ 1.,  2.]])

In [9]:
t1 * 2
t1.mul(2)

tensor([[2., 4.],
        [6., 8.]])

In [10]:
t1 / 2
t1.div(2)

tensor([[0.5000, 1.0000],
        [1.5000, 2.0000]])

In [11]:
t1 > 1 

tensor([[False,  True],
        [ True,  True]])

In [12]:
np.broadcast_to(2, t1.shape)

array([[2, 2],
       [2, 2]])

**Even t1 and t2 are in different shape, t2 will be broadcasted to the shape of t1 and then element-wise operation is implemented:**

In [13]:
t1 = torch.tensor([
    [1,2],
    [3,4]
], dtype = torch.float32)

t2 = torch.tensor([2, 4], dtype = torch.float32)

t1 + t2

tensor([[3., 6.],
        [5., 8.]])

#### Other element-wise opertations:
Comparison operations:

In [20]:
t = torch.tensor([
    [0,5,7],
    [6,0,7],
    [0,8,0]
], dtype = torch.float32)

# Comparison operations:
t.eq(0)    # t == 0?   equal
t.ge(0)    # t >= 0?   greater or equal
t.gt(0)    # t > 0?    greater than
t.lt(0)    # t < 0?    less than
t.le(0)    # t <= 0?   less or equal

tensor([[ True, False, False],
        [False,  True, False],
        [ True, False,  True]])

Following operations are equivalent:

In [22]:
t.le(7)

t <= torch.tensor(np.broadcast_to(7, t.shape), dtype = torch.float32)

t <= torch.tensor([[7,7,7],
                   [7,7,7],
                   [7,7,7]
                  ], dtype=torch.float32)

tensor([[ True,  True,  True],
        [ True,  True,  True],
        [ True, False,  True]])

Other mathematic operations:

In [23]:
t.abs()    # absolute values
t.sqrt()   # square root
t.neg()    # negative values
t.neg().abs()   # negative values first, then absolute values

tensor([[0., 5., 7.],
        [6., 0., 7.],
        [0., 8., 0.]])

### 3.8 ArgMax and Reduction Tensor Ops
Reduction operation on a tensor is an operation that reduces the number of elements contained within the tensor.

In [24]:
import torch
import numpy as np

**Reduction operations:**

In [31]:
t = torch.tensor([[0,1,0],
                  [2,0,2],
                  [0,3,0]], dtype = torch.float32)

# operations reduce all elements into 1 value
t.sum()
t.prod()
t.mean()
t.std()

# operations reduce all elements into multiple values alone some axis
t.sum(dim = 1)
t.sum(dim = 0)
t[0] + t[1] + t[2]
t[0].sum()

tensor(1.)

**ArgMax operations, return the index of max value：**

In [34]:
t.argmax()    # if axis is not specified, return the flatten index of max value

tensor(7)

In [35]:
# return the max values alone 0-axis, 
# also return the indice alone the 0-axis
t.max(dim = 0)

torch.return_types.max(
values=tensor([2., 3., 2.]),
indices=tensor([1, 2, 1]))

In [36]:
t.mean()           # get the tensor scalar of the mean value
t.mean().item()    # get the actual value number of the mean value
t.mean(dim=0).tolist()    # get the mean values alone 0-axis and transfer to list()
t.mean(dim=0).numpy()     # get mean values and transform to numpy array

array([0.6666667, 1.3333334, 0.6666667], dtype=float32)