# Torch Basics

Yihui "Ray" Ren 
yren@bnl.gov

## TOC:

* Vectorized Computation
    - numpy torch interchangable API
    - simple linear regression in numpy and torch
* AutoGrad (Automatic Differentiation)
    - torch tensor, backward and `grad` 
    - autograd demo
    - `torch.Module` and `forward`.
    - re-write linear regression in `torch.Module`
* Handling Data 
    - Stochastic Gradient Descent (SGD)
    - `torch.DataSet`
    - `torch.DataLoader`
    - re-write linear regression
* Multi-layer Perceptron
    - activation functions
* GPU offloading
    - parameter and buffer

In [None]:
## load modules
import numpy as np
import pandas as pd
import torch 
import torch.nn as nn
import seaborn as sns
import matplotlib.pyplot as plt

for pkg in ["np", "pd", "torch"]:
    print(f"{pkg:<6} ver: {eval(pkg).__version__}")

## last tested on 
#  np     ver: 1.18.5
#  pd     ver: 1.1.3
#  torch  ver: 1.7.0

## Vectorized Computation
modified from this repo [myazdani/numpy-pytorch-cheatsheet](https://github.com/myazdani/numpy-pytorch-cheatsheet)


In [None]:
## Array Creation
some_shape = (5,3)
some_list = [5,3,2,1]
def compare_numpy_torch(np_cb, th_cb, some):
    x = np_cb(some)
    y = th_cb(some)
    x, y = x.shape, y.shape
    return x, y

for func in ["empty", "ones", "zeros"]:
    print("compare", func)
    npf, thf = eval("np."+func), eval("torch."+func)
    print(compare_numpy_torch(npf, thf, some_shape))

# random tensor
print("compare", "rand")
x = np.random.rand(*some_shape) # np rand does not take a tuple for shape
y = torch.rand(some_shape)
print(x.shape, y.shape)

# change random seed, get and set state
np.random.seed(5)
rng_state = np.random.get_state()
np.random.set_state(rng_state)

torch.random.manual_seed(5)
rng_state = torch.random.get_rng_state()
torch.random.set_rng_state(rng_state)

# convert from numpy and torch
x = np.random.rand(2,2)
y = torch.tensor(x) # convert np to torch
z = y.numpy() # convert torch to np
assert (x == z).all()

In [None]:
## tensor operation
x_shape = (3, 3)
y_shape = (3, 3)
op = "init"
print(f"""{"="*30}{op:^20}{"="*30}""")
npx = np.random.rand(*x_shape)
npy = np.random.rand(*y_shape)
thx = torch.tensor(npx)
thy = torch.tensor(npy)
print("x:",npx)
print("y:",npy)

### add 
op = "add"
print(f"""{"="*30}{op:^20}{"="*30}""")
npz = npx + npy
thz = thx + thy
print("x+y=z:", npz)
assert (npz == thz.numpy()).all()

### mat product
op = "multiply"
print(f"""{"="*30}{op:^20}{"="*30}""")
npz = npx@npy
thz = thx@thy
print("x@y=z:", npz)
assert np.isclose(npz, thz.numpy()).all()


npz = npx.dot(npy)
thz = thx.mm(thy)
print("x.mm(y)=z:", npz)
assert np.isclose(npz, thz.numpy()).all()

npz = np.matmul(npx,npy)
thz = torch.mm(thx, thy)
print("pkg.mm(x, y)=z:", npz)
assert np.isclose(npz, thz.numpy()).all()

### elementwise mult aka Hadamard product
op = "elementwise multi"
print(f"""{"="*30}{op:^20}{"="*30}""")
npz = npx*npy
thz = thx*thy
print("x*y=z:", npz)
assert np.isclose(npz, thz.numpy()).all()

In [None]:
## Tensor Manipulations
def create_test_tensors(x_shape):
    npx = np.random.rand(*x_shape)
    thx = torch.tensor(npx)
    return npx, thx
    
### transpose
op = "transpose"
print(f"""{"="*30}{op:^20}{"="*30}""")

tensor_shape = (1,3)
npx, thx = create_test_tensors(tensor_shape)
npxT = npx.T
thxT = thx.T

tensor_shape = (3,4,5)
npx, thx = create_test_tensors(tensor_shape)
print("before transpose", npx.shape, thx.shape)
# npxT = np.transpose(npx, (1,0,2)) # also works
npxT = npx.transpose((1,0,2))
# thxT = torch.permute(thx, (1,0,2)) # does not works in torch1.7
thxT = thx.permute((1,0,2)) 
print("after transpose ", npxT.shape, thxT.shape)

### flatten and reshape 
op = "flatten"
print(f"""{"="*30}{op:^20}{"="*30}""")
tensor_shape = (3,4,5)
npx, thx = create_test_tensors(tensor_shape)
npflat1 = npx.reshape(-1)
npflat2 = npx.flatten()
thflat1 = thx.reshape(-1)
thflat2 = thx.flatten()
thflat3 = thx.view(-1)
thflat4 = torch.flatten(thx)
for x in [npflat1, npflat2, thflat1, thflat2, thflat3, thflat4]:
    print(x.shape)

### Squeeze and Unsqueeze (adding and removing dummy dimensions)
op = "squeeze"
print(f"""{"="*30}{op:^20}{"="*30}""")
tensor_shape = (3,1,5)
npx, thx = create_test_tensors(tensor_shape)
print("before squeeze", npx.shape, thx.shape)
npxs = npx.squeeze() 
thxs = thx.squeeze() 
print("after squeeze ", npxs.shape, thxs.shape)
op = "unsqueeze"
print(f"""{"="*30}{op:^20}{"="*30}""")
npxus = np.expand_dims(npxs,1)
thxus = thxs.unsqueeze(1)
print("after unsqueeze at dim 1:", npxus.shape, thxus.shape)

### Concat 
op = "concatenate"
print(f"""{"="*30}{op:^20}{"="*30}""")
tensor_shape = (3,5)
npx, thx = create_test_tensors(tensor_shape)
npy, thy = create_test_tensors(tensor_shape)
print("before concat", npx.shape, npy.shape)
npz0 = np.concatenate((npx, npy), axis=0)
thz0 = torch.cat((thx, thy), axis=0)
assert npz0.shape == thz0.shape
print("after concat along dim 0:", npz0.shape)
npz1 = np.concatenate((npx, npy), axis=1)
thz1 = torch.cat((thx, thy), axis=1)
assert npz1.shape == thz1.shape
print("after concat along dim 1:", npz1.shape)

In [None]:
## get data for linear regression. 
wine_quality_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
df = pd.read_csv(wine_quality_url, delimiter=";")
w = df.corr()
sns.heatmap(w)
up_tri = np.triu(np.abs(w.to_numpy()),k=1)
max_idx = np.argmax(up_tri)
col_sz = len(df.columns)
col1, col2 = df.columns[max_idx//col_sz], df.columns[max_idx%col_sz]
print(max_idx,up_tri.flatten()[max_idx], col1, col2)

print("found {col1} and {col2} for linear regression")
plt.figure()
sns.scatterplot(x=df[col1], y=df[col2])
plt.title(f"{col1} and {col2} \n corr.coef. = {np.corrcoef(df[col1], df[col2])[0,1]:.4f}")

wine_x, wine_y = df[col1].to_numpy(), df[col2].to_numpy()

# simple linear regression 
find a and b such that  
$\sum (y-y')^2$
is minimized, where
$y' = ax+b$

with solution 

$a = \sum(x - \bar{x})(y - \bar{y}) / \sum (x - \bar{x})^2$

$b = \bar{y} - a\bar{x}$


In [None]:
xbar = np.mean(wine_x)
ybar = np.mean(wine_y)
a = (wine_x - xbar)@(wine_y-ybar).T / np.power(wine_x-xbar, 2).sum()
b = ybar - a * xbar
plt.scatter(wine_x, wine_y, facecolor='none', edgecolor='b', alpha=0.1)
plt.plot(np.linspace(0, 30), a*np.linspace(0,30)+b, 'r')
plt.xlim([0,30])
plt.ylim([0.985,1.01])
plt.title(f"mse = {np.power(wine_y - (a*wine_x+b), 2).mean():.4E}");

In [None]:
## Exercise: re-write in torch

In [None]:
## Solution
thx = torch.tensor(wine_x)
thy = torch.tensor(wine_y)
xbar = torch.mean(thx)
ybar = torch.mean(thy)
a = (thx - xbar)@(thy-ybar).T / torch.pow(thx-xbar, 2).sum()
b = ybar - a * xbar
plt.scatter(wine_x, wine_y, facecolor='none', edgecolor='b', alpha=0.1)
plt.plot(np.linspace(0, 30), a*np.linspace(0,30)+b, 'r')
plt.xlim([0,30])
plt.ylim([0.985,1.01])
plt.title(f"mse = {torch.pow(thy - (a*thx+b), 2).mean():.4E}");

## AutoGrad

In [None]:
## y = a*x, dy/dx = ? 
x = torch.tensor(0.1, requires_grad=True)
a = torch.tensor(3)
y = a*x # y=ax, dy/dx = a
y.backward()
print(x, x.grad)

In [None]:
## y = exp(a*x), dy/dx = ? 
x = torch.tensor(0.3, requires_grad=True)
a = torch.tensor(3)
y = torch.exp(a*x) # y = exp(ax), dy/dx = exp(ax) d(ax)/dx = exp(ax) * a
y.backward()
print(x, x.grad, a*torch.exp(a*x).detach())

In [None]:
## Exercise: pick a f(x) you like, and autograd it!  


In [None]:
## Solution:  for example y = (sin(x)+1)^x
x = torch.tensor(0.2, requires_grad=True)
y = (torch.sin(x)+1).pow(x)
y.backward()
ans = torch.exp(x*torch.log(torch.sin(x)+1))*((torch.log(torch.sin(x)+1))+x*(torch.sin(x)+1).pow(-1)*torch.cos(x))
print(x, x.grad, ans)

In [None]:
## Torch Module 
# https://pytorch.org/docs/stable/generated/torch.nn.Module.html
# torch.module: Packing parameters and functions together
# two APIs, __init__() and forward()

class Func(nn.Module):
    def __init__(self):
        super().__init__()
        self.x = torch.tensor(0.2, requires_grad=True) 
        
    def forward(self, input):
        return (self.x.sin()+1).pow(self.x)
    
## create a module 
func = Func()
y = func(None)
y.backward()
print(func.x.grad)

In [None]:
## Error: if requires_grad=False
print("Warning: will produce error")
class FuncError(nn.Module):
    def __init__(self):
        super().__init__()
        self.x = torch.tensor(0.2) 
        
    def forward(self, input):
        return (self.x.sin()+1).pow(self.x)
    
## create a module 
func = FuncError()
y = func(None)
try:
    y.backward()
except RuntimeError as err:
    print(err)
    
print(func.x.grad)


In [None]:
## Better to use torch.nn.Parameter
## and name differently

class FuncPara(nn.Module):
    def __init__(self):
        super().__init__()
        w = torch.tensor(0.2)
        self.w = nn.Parameter(w)
        
    def forward(self, input):
        return (self.w.sin()+1).pow(self.w)
    
## create a module 
func = FuncPara()
y = func(None)
y.backward()
print(func.w.grad)

In [None]:
## The benefits are two folds: 
#  * registered to module parameters. 
#  * moves with modules to device.

print("iterating parameters")
for p in func.parameters():
    print(p)
    
print("iterating parameters of the one using tensor")
func = Func()
for p in func.parameters():
    print(p)
print("got nothing")

if torch.cuda.is_available():
    # if FuncPara
    func_p = FuncPara()
    print(func_p.w.device)
    # also works on GPU
    func_p = func_p.cuda()
    print(func_p.w.device)
    
    # if Func tensor
    func_t = Func()
    print(func_t.x.device)
    # also works on GPU
    func_t = func_t.cuda()
    print(func_t.x.device)
    
    # print out:
    # cpu
    # cuda:0
    # cpu
    # cpu