In [1]:
# 넘파이

import numpy as np
np.random.seed(0)

N,D = 3, 4 

x = np.random.randn(N,D)
y = np.random.randn(N,D)
z = np.random.randn(N,D)

a = x*y
b = a + z
c = np.sum(b)

grad_c = 1.0
grad_b = grad_c * np.ones((N,D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x


In [3]:
# 파이토치

device = 'cuda:0' # GPU 상에서도 간단히 연산시킬 수 있다. 
import torch

N,D = 3, 4 
x = torch.randn(N,D, requires_grad=True)
y = torch.randn(N,D)
z = torch.randn(N,D)

a = x*y
b = a + z
c = torch.sum(b)

c.backward()
print(x.grad)

tensor([[-1.9713, -0.5809,  0.3431, -1.1704],
        [ 0.1087, -1.0006,  1.5713, -0.4357],
        [-0.6280,  0.1170,  2.3806, -1.3271]])


In [6]:
#  두 층의 ReLU 네트워크, 랜덤 데이터 사용, L2 loss 사용
# GPU 에서도 연산 가능

import torch

device = torch.device("cpu")

# 데이터와 가중치에 관한 랜덤 텐서 만들기
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N,D_in, device=device)
y = torch.randn(N,D_out, device=device)
w1 = torch.randn(D_in, H, device=device)
w2 = torch.randn(H, D_out, device=device)


learning_rate = 1e-6
for t in range(500):
    # 연산 (forward pass)
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)
    loss = (y_pred - y).pow(2).sum()
    
    # 연산 (Backward를 통해 수동으로 연산)
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h<0] = 0
    grad_w1 = x.t().mm(grad_h)
    
    # 그래디언트를 통해 가중치 업데이트 
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2
    


In [9]:
# PyTorch: Autograd

import torch

N, D_in, H, D_out = 64, 1000, 100, 10

x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in, H, requires_grad=True) # gradient 를 자동으로 추적해서 계산
w2 = torch.randn(H, D_out, requires_grad=True) # gradient 를 자동으로 추적해서 계산

learning_rate = 1e-6
for t in range(500):
    # 연산 (forward pass)
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    loss = (y_pred - y).pow(2).sum()
    
    loss.backward() # gradient 자동 계산
    
    # 학습
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        # 초기화
        w1.grad.zero_()
        w2.grad.zero_()


In [10]:
# Autograd Function cutomize

class MyReLU(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return x.clamp(min=0)
    
    @staticmethod
    def backward(ctx, grad_y):
        x, = ctx.saved_tensors
        grad_input = grad_y.clone()
        grad_input[x<0] = 0
        return grad_input


In [11]:
# Pytorch nn -> Higher-level wrapper for working with neural nets

import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

# 직접 계산하지 않고 모델을 만들어서 사용
model = torch.nn.Sequential(
            torch.nn.Linear(D_in, H), 
            torch.nn.ReLU(),
            torch.nn.Linear(H, D_out))

learning_rate = 1e-2
for t in range(500):
    y_pred = model(x) # 모델을 사용해 간단히 표현
    loss = torch.nn.functional.mse_loss(y_pred,y)
    
    loss.backward()
    
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad
    model.zero_grad()

In [12]:
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

# 직접 계산하지 않고 모델을 만들어서 사용
model = torch.nn.Sequential(
            torch.nn.Linear(D_in, H), 
            torch.nn.ReLU(),
            torch.nn.Linear(H, D_out))

learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # 모델학습시 optim 사용 

for t in range(500):
    y_pred = model(x) # 모델을 사용해 간단히 표현
    loss = torch.nn.functional.mse_loss(y_pred,y)
    
    loss.backward()
    
    optimizer.step() # 자동 학습
    optimizer.zero_grad() # 초기화

In [13]:
import torch
from torch.utils.data import TensorDataset, DataLoader

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)

loader = DataLoader(TensorDataset(x,y), batch_size=8) # 데이터셋 설정
model = TwoLayerNet(D_in, H, D_out)

optimizer = torch.optim.SGD(model.parameters(), lr=1e-2) 
for epoch in range(20):
    for x_batch, y_batch in loader:
        y_pred = model(x_batch)
        loss = torch.nn.functional.mse_loss(y_pred, y_batch)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

NameError: name 'TwoLayerNet' is not defined