In [2]:
import torch

In [3]:

x = torch.arange(12, dtype = torch.float32).reshape((3,4))
y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])

# dim = n: Concat tensors on N+1 dimensions
torch.cat((x,y), dim=0), torch.cat((x,y), dim=1)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [ 2.,  1.,  4.,  3.],
         [ 1.,  2.,  3.,  4.],
         [ 4.,  3.,  2.,  1.]]),
 tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
         [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

In [4]:
x == y

tensor([[False,  True, False,  True],
        [False, False, False, False],
        [False, False, False, False]])

In [5]:
# Sum up all the elements to create a 1-d tensor
x.sum()

tensor(66.)

In [6]:
# Broadcasting mechanism

# The size of tensor a must match the size of tensor b at non-singleton dimension 0
a = torch.arange(3).reshape((3,1)) # (3,1) -> (3,2)
b = torch.arange(2).reshape((1,2)) # (1,2) -> (3,2)

a,b,a+b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]),
 tensor([[0, 1],
         [1, 2],
         [2, 3]]))

In [7]:
# id: the unique id of the object in Python, all objects in Python has its own id.
before = id(y)
y = y + x
id(y) == before

False

In [8]:
# Returns a tensor filled with the scalar value 0, with the same size as input.

# in place - a
z = torch.zeros_like(y)
print('before:',id(z))

z[:] = x + y
print('after',id(z))

# in place - b
before = id(x)
x += y
id(x) == before

before: 134241757620368
after 134241757620368


True

In [9]:
# Change data type between numpy and pytorch
a = x.numpy()
b = torch.tensor(a)
type(a), type(b)

(numpy.ndarray, torch.Tensor)

In [10]:
a = torch.tensor([3.5])
a, a.item(), float(a), int(a)

(tensor([3.5000]), 3.5, 3.5, 3)

In [11]:
import os

os.makedirs(os.path.join('..','data'), exist_ok = True)
data_file = os.path.join('..','data','house_tiny.csv')

with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n') # Column names
    f.write('NA,Pave,127500\n') # Values
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [12]:
import pandas as pd

data = pd.read_csv(data_file)
data

Unnamed: 0,NumRooms,Alley,Price
0,,Pave,127500
1,2.0,,106000
2,4.0,,178100
3,,,140000


In [13]:
# Handle missing values
# With mean value

inputs, outputs = data.iloc[:,0:2], data.iloc[:,2]
inputs = inputs.fillna(inputs.mean(numeric_only = True))
inputs

Unnamed: 0,NumRooms,Alley
0,3.0,Pave
1,2.0,
2,4.0,
3,3.0,


In [14]:
# One-hot encoding
inputs = pd.get_dummies(inputs, dummy_na = True, dtype = int)
inputs,inputs.values

(   NumRooms  Alley_Pave  Alley_nan
 0       3.0           1          0
 1       2.0           0          1
 2       4.0           0          1
 3       3.0           0          1,
 array([[3., 1., 0.],
        [2., 0., 1.],
        [4., 0., 1.],
        [3., 0., 1.]]))

In [15]:
X, y = torch.tensor(inputs.values), torch.tensor(outputs.values)
X, y

(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100, 140000]))

In [25]:
a = torch.arange(12)
b = torch.arange(8)

c = a.reshape((3,4)).clone()
c[:] = 2

d = b.view((4,2))
d[:] = 2

a, b, c, d, id(a), id(b), id(c), id(d)

(tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]),
 tensor([2, 2, 2, 2, 2, 2, 2, 2]),
 tensor([[2, 2, 2, 2],
         [2, 2, 2, 2],
         [2, 2, 2, 2]]),
 tensor([[2, 2],
         [2, 2],
         [2, 2],
         [2, 2]]),
 134240945437280,
 134240945437360,
 134241221408816,
 134240945286224)