In [17]:
import torch

In [2]:
x = torch.arange(12)
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [3]:
x.shape

torch.Size([12])

In [4]:
x.numel()

12

In [5]:
X = x.reshape(3,4)
X

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [6]:
torch.zeros((2,3,4))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [7]:
x = torch.tensor([1.0,2,4,8])
y = torch.tensor([2,2,2,2])
x+y, x-y, x*y, x/y, x**y

(tensor([ 3.,  4.,  6., 10.]),
 tensor([-1.,  0.,  2.,  6.]),
 tensor([ 2.,  4.,  8., 16.]),
 tensor([0.5000, 1.0000, 2.0000, 4.0000]),
 tensor([ 1.,  4., 16., 64.]))

### 把多个张量连结在一起

In [8]:
X = torch.arange(12, dtype=torch.float32).reshape((3,4))
Y = torch.tensor([[2.0,1,4,3],[1,2,3,4],[4,3,2,1]])
display(X,Y)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

tensor([[2., 1., 4., 3.],
        [1., 2., 3., 4.],
        [4., 3., 2., 1.]])

In [9]:
torch.cat((X,Y))

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]])

In [11]:
X2 = torch.cat((X,Y),dim=0)
X2

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]])

In [12]:
X2.shape

torch.Size([6, 4])

In [14]:
X2.numel()

24

In [17]:
torch.cat((X,Y),dim=1)

tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
        [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
        [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]])

In [19]:
# 通过 逻辑运算符构建二元张量
X == Y

tensor([[False,  True, False,  True],
        [False, False, False, False],
        [False, False, False, False]])

In [20]:
# 对张量中的元素进行求和会产生一个只有一个元素的张量
X.sum()

tensor(66.)

### 即使形状不同，我们仍然可以通过调用广播机制（broadcasting mechanism）来执行按元素操作

In [21]:
a = torch.arange(3).reshape((3,1))
b = torch.arange(2).reshape((1,2))
a,b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]))

In [23]:
a.shape

torch.Size([3, 1])

In [24]:
b.shape

torch.Size([1, 2])

In [25]:
a+b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

### 运行一些操作可能会导致为新结果分配内存

In [26]:
before = id(Y)
Y = Y + x
id(Y) == before

False

In [27]:
Z = torch.zeros_like(Y)
print('id(z)',id(Z))

id(z) 4925045456


In [29]:
Z[:] = X + Y
print('id(z)',id(Z))

id(z) 4925045456


In [30]:
before = id(X)

In [31]:
X += Y

In [32]:
id(X) == before

True

In [33]:
X = X+Y


In [34]:
id(X) == before

False

### 转换为NumPy张量

In [35]:
A = X.numpy()
B = torch.tensor(A)
type(A), type(B)

(numpy.ndarray, torch.Tensor)

### 将大小为1的张量转换为Python标量

In [36]:
a = torch.tensor([3.5])
a

tensor([3.5000])

In [37]:
a.item(), float(a), int(a)

(3.5, 3.5, 3)

# 数据预处理
### 创建一个人工数据集，并存储在csv（逗号分隔值）文件

In [2]:
import os

In [3]:
os.makedirs(os.path.join('..', 'data'), exist_ok=True)

In [4]:
data_file = os.path.join('..','data','house_tiny.csv')
data_file

'../data/house_tiny.csv'

In [5]:
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n') #列名（key名）
    f.write('NA,Pave,127500\n') #每行表示一个数据样本
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [6]:
data_file

'../data/house_tiny.csv'

In [7]:
import pandas as pd

In [8]:
data = pd.read_csv(data_file)

In [9]:
# print(data)
data

Unnamed: 0,NumRooms,Alley,Price
0,,Pave,127500
1,2.0,,106000
2,4.0,,178100
3,,,140000


# 为了处理缺失的数据，典型的方法包括插值和删除，

In [11]:
inputs, outputs = data.iloc[:, 0:2], data.iloc[:,2]
inputs = inputs.fillna(inputs.mean())
print(inputs)

TypeError: can only concatenate str (not "int") to str

# 对于inputs中的类别值或离散值，将NaN视为一个类别

In [16]:
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       NaN        True      False
1       2.0       False       True
2       4.0       False       True
3       NaN       False       True


# 现在inputs和outputs中的所有条目都是数值类型，它们可以转换为张量（Tensor）格式

In [19]:
X = torch.tensor(inputs.values)
X

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

In [20]:
y = torch.tensor(outputs.values)
y

tensor([127500, 106000, 178100, 140000])