# Basic Use of Pytorch

In [1]:
import torch

In [2]:
# arange is the most common used command to create a new tensor
x=torch.arange(12)
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [3]:
x.shape

torch.Size([12])

In [4]:
# numel--number of elements
x.numel()

12

In [5]:
x=x.reshape(3,4)
x

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [6]:
# zeros--create all zeros; ones--create all ones
torch.zeros(2,2,2)

tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

In [7]:
# directly create tensor from list
torch.tensor([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])

tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]])

In [8]:
x=torch.tensor([1,2,3,4])
y=torch.tensor([2,2,2,2])

In [9]:
# implemented elementwise
print(x+y)
print(x-y)
print(x*y)
print(x/y)
print(x**y)

tensor([3, 4, 5, 6])
tensor([-1,  0,  1,  2])
tensor([2, 4, 6, 8])
tensor([0.5000, 1.0000, 1.5000, 2.0000])
tensor([ 1,  4,  9, 16])


In [10]:
# exponent
torch.exp(x)

tensor([ 2.7183,  7.3891, 20.0855, 54.5981])

In [11]:
# assert the type of element
X=torch.arange(12, dtype=torch.float32).reshape(3,4)
Y=torch.arange(12, dtype=torch.float32).reshape(3,4)

In [12]:
# cat--concatenate two tensor
torch.cat((X,Y), dim=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

In [13]:
torch.cat((X,Y),dim=1)

tensor([[ 0.,  1.,  2.,  3.,  0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.,  8.,  9., 10., 11.]])

In [14]:
# create tensor--compare each element
X==Y

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

In [15]:
# the sum of elements
X.sum()

tensor(66.)

In [16]:
# broadcast machanism--one dimension of a & b must be 1, otherwise it will fail
a=torch.arange(3).reshape(3,1)
b=torch.arange(2).reshape(1,2)
a+b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

In [18]:
# inplace 
before_id=id(Y)
Y+=1;
print(before_id == id(Y))
# inplace
before_id=id(Y)
Y[1][1]=1;
print(before_id == id(Y))

True

In [19]:
# not inplace
before_id=id(Y)
Y=Y+1;
before_id == id(Y)

False

In [21]:
import numpy

In [22]:
# tensor<->numpy
A=X.numpy()
B=torch.tensor(A)
type(A), type(B)

(numpy.ndarray, torch.Tensor)

In [23]:
# item() can get the single item out of tensor
A[0][0].item(), type(A[0][0].item()), int(A[0][0])

(0.0, float, 0)

# Data Preprocess

In [24]:
import os
# create direction
os.makedirs(os.path.join('..','data'),exist_ok=True)
# file path
data_file=os.path.join('..','data','house_tiny.csv')
with open(data_file, 'w') as f:
    # write file
    f.write('NumRooms,Alley,Price\n')
    f.write('NA,PAVE,127500\n')
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [28]:
import pandas as pd
# read csv data
data=pd.read_csv(data_file)
data

Unnamed: 0,NumRooms,Alley,Price
0,,PAVE,127500
1,2.0,,106000
2,4.0,,178100
3,,,140000


In [29]:
# how to handle NaN data?
# interpolate
# iloc--index location
inputs, outputs = data.iloc[:,0:2], data.iloc[:,2]
# fillna--fill NaN data, only for number not string
inputs = inputs.fillna(inputs.mean())
inputs

  inputs = inputs.fillna(inputs.mean())


Unnamed: 0,NumRooms,Alley
0,3.0,PAVE
1,2.0,
2,4.0,
3,3.0,


In [30]:
# get_dummies--set different type for differnt value(useful for string)
# dummy_na=True--it consider NaN as a special type
inputs = pd.get_dummies(inputs, dummy_na=True)
inputs

Unnamed: 0,NumRooms,Alley_PAVE,Alley_nan
0,3.0,1,0
1,2.0,0,1
2,4.0,0,1
3,3.0,0,1


In [35]:
# pandas<->numpy
inputs.values

array([[3., 1., 0.],
       [2., 0., 1.],
       [4., 0., 1.],
       [3., 0., 1.]])

In [37]:
# it will lose the schema
pd.DataFrame(inputs.values)

Unnamed: 0,0,1,2
0,3.0,1.0,0.0
1,2.0,0.0,1.0
2,4.0,0.0,1.0
3,3.0,0.0,1.0


In [39]:
# convert from pandas to tensor
x, y = torch.tensor(inputs.values), torch.tensor(outputs.values)
x, y
# python use float64 in default, but for deeplearning, float32(most frequently used) is just fine

(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100, 140000]))