In [1]:
import torch

### Create tensors

In [4]:
x = torch.arange(12,dtype=torch.float32) #create a vector from 0 to 11
x

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [9]:
print(x.shape) # the shape of the tensor
print(x.numel()) # total number of elements in a tensor

torch.Size([12])
12


In [11]:
X = x.reshape(3, 4) # reshape the tensor into a new tensor
print(X)
X = x.reshape(-1, 4) # reshape the tensor into a new tensor
print(X)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])
tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])


In [16]:
print(torch.zeros((2, 3, 4)))
print(torch.ones((2, 3, 4)))
print(torch.randn(3, 4))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])
tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])
tensor([[-1.1264, -0.0401, -0.1202, -0.2631],
        [ 0.1848, -0.8087, -0.6706,  0.8481],
        [-0.0708, -1.3223, -0.1316, -0.2485]])


In [18]:
torch.tensor([[1,2,3],[2,3,4]])

tensor([[1, 2, 3],
        [2, 3, 4]])

### Operation for tensors

In [23]:
x = torch.tensor([1.0,2,4,8]) # 1.0 can convert this tensor into the float number
y = torch.tensor([2,3,4,5])
x+y, x-y, x*y, x/y, x**y # The ** operator is exponentiation, 1^2, 2^3....

(tensor([ 3.,  5.,  8., 13.]),
 tensor([-1., -1.,  0.,  3.]),
 tensor([ 2.,  6., 16., 40.]),
 tensor([0.5000, 0.6667, 1.0000, 1.6000]),
 tensor([1.0000e+00, 8.0000e+00, 2.5600e+02, 3.2768e+04]))

In [24]:
torch.exp(x)

tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03])

In [28]:
X = torch.arange(12, dtype=torch.float32).reshape((3,4))
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
torch.cat((X,Y),dim=0), torch.cat((X,Y),dim=1) # dim=0:row, dim=1:column

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [ 2.,  1.,  4.,  3.],
         [ 1.,  2.,  3.,  4.],
         [ 4.,  3.,  2.,  1.]]),
 tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
         [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

In [29]:
X == Y

tensor([[False,  True, False,  True],
        [False, False, False, False],
        [False, False, False, False]])

In [30]:
X.sum()

tensor(66.)

In [31]:
#broadcast mechanism
a = torch.arange(3).reshape((3, 1)) 
b = torch.arange(2).reshape((1, 2)) 
a, b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]))

In [32]:
a+b # 3x2

tensor([[0, 1],
        [1, 2],
        [2, 3]])

In [37]:
#indexing and slicing
X = torch.arange(12, dtype=torch.float32).reshape((3,4))
X, X[-1], X[1:3]

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]),
 tensor([ 8.,  9., 10., 11.]),
 tensor([[ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]))

In [38]:
X[1,2] = 9
X

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  9.,  7.],
        [ 8.,  9., 10., 11.]])

In [40]:
X[0:2,:] = 12
X

tensor([[12., 12., 12., 12.],
        [12., 12., 12., 12.],
        [ 8.,  9., 10., 11.]])

In [42]:
# save memory
before = id(Y)
Y = X+Y
id(Y) == before

False

In [45]:
# first way for saving memory
Z = torch.zeros_like(Y)
print(id(Z))
Z[:] = X+Y
print(id(Z))
# second way for saving memory
before = id(Y)
Y += X
id(Y)==before

1881182821968
1881182821968


True

In [46]:
# conversion to other python objects
A = X.numpy()
B = torch.from_numpy(A)
type(A),type(B)

(numpy.ndarray, torch.Tensor)

In [47]:
a = torch.tensor([3.5])
a, a.item(), float(a), int(a)

(tensor([3.5000]), 3.5, 3.5, 3)

### Data preprocessing

In [48]:
import os 

In [57]:
os.makedirs(os.path.join('.', 'data'), exist_ok=True) #create a directory
data_file = os.path.join('.', 'data', 'house_tiny.csv') #define the path of the file

In [59]:
with open(data_file,'w') as f: #write the content and create a file
    f.write('NumRooms,Alley,Price\n') # Column names
    f.write('NA,Pave,127500\n') # Each row represents a data example
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')

In [62]:
import pandas as pd

In [64]:
data = pd.read_csv(data_file)
print(data)

   NumRooms Alley   Price
0       NaN  Pave  127500
1       2.0   NaN  106000
2       4.0   NaN  178100


In [65]:
inputs, outputs = data.iloc[:,0:2],data.iloc[:,2]

In [67]:
inputs = inputs.fillna(inputs.mean())
print(inputs)

   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN


  inputs = inputs.fillna(inputs.mean())


In [69]:
inputs = pd.get_dummies(inputs, dummy_na=True) # one_hot
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       3.0           1          0
1       2.0           0          1
2       4.0           0          1


In [71]:
X,y = torch.tensor(inputs.values), torch.tensor(outputs.values) #convert them into the tensor
X,y

(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100]))

### Linear Algebra

In [72]:
x = torch.tensor(3.0)
y = torch.tensor(2.0)
x+y,x-y,x*y,x/y

(tensor(5.), tensor(1.), tensor(6.), tensor(1.5000))

In [75]:
x = torch.arange(4) #vector, the length of the vecor is called dimension
x, x[3], len(x)

(tensor([0, 1, 2, 3]), tensor(3), 4)

Note: To clarify, we use the dimensionality of a vector or an axis to refer to its length, i.e., the number ofelements ofa vector or an axis. However, we use the dimensionality ofa tensor to refer to the number of axes that a tensor has. 

In [77]:
A = torch.arange(20).reshape(5,4) #matrix
A, A.T

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [12, 13, 14, 15],
         [16, 17, 18, 19]]),
 tensor([[ 0,  4,  8, 12, 16],
         [ 1,  5,  9, 13, 17],
         [ 2,  6, 10, 14, 18],
         [ 3,  7, 11, 15, 19]]))

In [78]:
B = torch.tensor([[1,2,3],[2,0,4],[3,4,5]]) #symmetric matrix 
B == B.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

In [79]:
A = torch.arange(20,dtype=torch.float32).reshape(5,4)
B = A.clone() # assign a new memory
A, A+B

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([[ 0.,  2.,  4.,  6.],
         [ 8., 10., 12., 14.],
         [16., 18., 20., 22.],
         [24., 26., 28., 30.],
         [32., 34., 36., 38.]]))

In [80]:
A*B #dot product

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.],
        [144., 169., 196., 225.],
        [256., 289., 324., 361.]])

In [82]:
a = 2
X = torch.arange(24).reshape(2, 3, 4)
a + X, (a * X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

In [84]:
# reduction
x = torch.arange(4,dtype=torch.float32)
x,x.sum()

(tensor([0., 1., 2., 3.]), tensor(6.))

In [85]:
# reduction referring to the dimension
A = torch.arange(20).reshape(5,4)
A

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]])

In [86]:
A_sum_axis0 = A.sum(axis=0)
A_sum_axis0, A_sum_axis0.shape

(tensor([40, 45, 50, 55]), torch.Size([4]))

In [87]:
A_sum_axis1 = A.sum(axis=1)
A_sum_axis1, A_sum_axis1.shape

(tensor([ 6, 22, 38, 54, 70]), torch.Size([5]))

In [88]:
A.sum(axis=[0, 1]) # Same as `A.sum()`

tensor(190)

In [94]:
A.float().mean(), A.sum() / A.numel()

(tensor(9.5000), tensor(9.5000))

In [96]:
A.float().mean(axis=0), A.sum(axis=0) / A.shape[0]

(tensor([ 8.,  9., 10., 11.]), tensor([ 8.,  9., 10., 11.]))

In [100]:
#non-reduction sum
sum_A = A.sum(axis=0, keepdims=True) # keep the dim
sum_A

tensor([[40, 45, 50, 55]])

In [101]:
A / sum_A #since sum_A still keeps its two axes after summing each row, we can divide A by sum_A with broadcasting.

tensor([[0.0000, 0.0222, 0.0400, 0.0545],
        [0.1000, 0.1111, 0.1200, 0.1273],
        [0.2000, 0.2000, 0.2000, 0.2000],
        [0.3000, 0.2889, 0.2800, 0.2727],
        [0.4000, 0.3778, 0.3600, 0.3455]])

In [102]:
A.cumsum(axis=0) #calculate the cumulative sum

tensor([[ 0,  1,  2,  3],
        [ 4,  6,  8, 10],
        [12, 15, 18, 21],
        [24, 28, 32, 36],
        [40, 45, 50, 55]])

In [2]:
# dot products

In [5]:
x = torch.randn(4,dtype=torch.float32)
y = torch.ones(4,dtype=torch.float32)
x, y, torch.dot(x,y)

(tensor([ 0.5189, -0.7939,  0.3115, -0.1424]),
 tensor([1., 1., 1., 1.]),
 tensor(-0.1060))

In [6]:
torch.sum(x*y)

tensor(-0.1060)

In [12]:
#matrix-vector products
A = torch.arange(20,dtype=torch.float32).reshape(5,4)
x = torch.ones(4,dtype=torch.float32)*2
A,x,torch.mv(A,x)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([2., 2., 2., 2.]),
 tensor([ 12.,  44.,  76., 108., 140.]))

In [14]:
#matrix-matrix multiplication
A = torch.arange(20,dtype=torch.float32).reshape(5,4)
B = torch.ones(4,3)
A,B,torch.mm(A,B)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 tensor([[ 6.,  6.,  6.],
         [22., 22., 22.],
         [38., 38., 38.],
         [54., 54., 54.],
         [70., 70., 70.]]))

In [15]:
#Norm
#The L2 norm of x is the square root of the sum of the squares of the vector elements:
#the L1 norm, which is expressed as the sum of the absolute values of the vector elements:
#Frobenius norm of a matrix X ∈ Rm×n is the square root of the sum of the squares of the matrix elements:
t = torch.tensor([3.0,4.0]) #L2 norm
torch.norm(t)

tensor(5.)

In [16]:
torch.abs(t).sum() # L1 norm

tensor(7.)

In [17]:
torch.norm(torch.ones((4, 9)))

tensor(6.)

### Calculus

In [31]:
# Automatic differentiation
x = torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [32]:
x.requires_grad_(True) # Same as `x = torch.arange(4.0, requires_grad=True)`
x.grad # The default value is None

In [33]:
y = 2*torch.dot(x,x)
y

tensor(28., grad_fn=<MulBackward0>)

In [34]:
y.backward()
x.grad, x.grad == 4 * x

(tensor([ 0.,  4.,  8., 12.]), tensor([True, True, True, True]))

In [35]:
# PyTorch accumulates the gradient in default, we need to clear the previous values
x.grad.zero_()
y = x.sum() # x
y.backward()
x.grad

tensor([1., 1., 1., 1.])

In [38]:
# Backward for Non-Scalar Variables
x.grad.zero_()
y = x * x
y.sum().backward() #grad can be implicitly created only for scalar outputs, so we need sum(), 求导只能对标量求导，即多输入单输出，比如1个loss是标量
x.grad

tensor([0., 2., 4., 6.])

In [43]:
# Detaching Computation
x.grad.zero_()
y = x*x
u = y.detach()
z = u*x
z.sum().backward()
x.grad==u

tensor([True, True, True, True])

In [45]:
x.grad.zero_()
y.sum().backward()
x.grad == 2 * x

tensor([True, True, True, True])

In [46]:
# Computing the Gradient of Python Control Flow
def f(a): 
    b = a * 2 
    while b.norm() < 1000: 
        b = b * 2
    if b.sum() > 0: 
        c = b
    else: 
        c = 100 * b
    return c

In [47]:
a = torch.randn(size=(), requires_grad=True)
d = f(a)
d.backward()

In [48]:
a.grad == d / a

tensor(True)

### Documentation

In [50]:
print(dir(torch.distributions))

['AbsTransform', 'AffineTransform', 'Bernoulli', 'Beta', 'Binomial', 'CatTransform', 'Categorical', 'Cauchy', 'Chi2', 'ComposeTransform', 'ContinuousBernoulli', 'CorrCholeskyTransform', 'Dirichlet', 'Distribution', 'ExpTransform', 'Exponential', 'ExponentialFamily', 'FisherSnedecor', 'Gamma', 'Geometric', 'Gumbel', 'HalfCauchy', 'HalfNormal', 'Independent', 'IndependentTransform', 'Kumaraswamy', 'LKJCholesky', 'Laplace', 'LogNormal', 'LogisticNormal', 'LowRankMultivariateNormal', 'LowerCholeskyTransform', 'MixtureSameFamily', 'Multinomial', 'MultivariateNormal', 'NegativeBinomial', 'Normal', 'OneHotCategorical', 'OneHotCategoricalStraightThrough', 'Pareto', 'Poisson', 'PowerTransform', 'RelaxedBernoulli', 'RelaxedOneHotCategorical', 'ReshapeTransform', 'SigmoidTransform', 'SoftmaxTransform', 'StackTransform', 'StickBreakingTransform', 'StudentT', 'TanhTransform', 'Transform', 'TransformedDistribution', 'Uniform', 'VonMises', 'Weibull', 'Wishart', '__all__', '__builtins__', '__cached__'

In [55]:
help(torch.ones)

Help on built-in function ones:

ones(...)
    ones(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
    
    Returns a tensor filled with the scalar value `1`, with the shape defined
    by the variable argument :attr:`size`.
    
    Args:
        size (int...): a sequence of integers defining the shape of the output tensor.
            Can be a variable number of arguments or a collection like a list or tuple.
    
    Keyword arguments:
        out (Tensor, optional): the output tensor.
        dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_tensor_type`).
        layout (:class:`torch.layout`, optional): the desired layout of returned Tensor.
            Default: ``torch.strided``.
        device (:class:`torch.device`, optional): the desired device of returned tensor.
            Default: if ``None``, uses the cur