In [5]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import os
import nbimporter

device = torch.device("cpu")
# device = torch.device("cuda:0") # GPU에서 실행하려면 이 주석을 제거

from a_single_neuron import model, loss_fn, SimpleDataset

In [6]:
os.getcwd()

'c:\\git_deeplearning\\DeepLearning\\_03_your_code\\_04_artificial_neuron_and_gradient_descent'

In [7]:
class SimpleDataset(Dataset):
  def __init__(self, *args, **kwargs):
    super().__init__(*args, **kwargs)

    X = [[0.5, 0.9], [14.0, 12.0], [15.0, 13.6],
         [28.0, 22.8], [11.0, 8.1], [8.0, 7.1],
         [3.0, 2.9], [4.0, 0.1], [6.0, 5.3],
         [13.0, 12.0], [21.0, 19.9], [-1.0, 1.5]]

    y = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4, 29.1]

    self.X = torch.tensor(X, dtype=torch.float, device=device)
    self.y = torch.tensor(y, dtype=torch.float, device=device)
    self.y = self.y * 0.01

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

  def __str__(self):
    str = "Data Size: {0}, Input Shape: {1}, Target Shape: {2}".format(
      len(self.X), self.X.shape, self.y.shape
    )
    return str

In [8]:
def model(X, W, b):
  # print(X.shape)  # >>> torch.Size([12, 2])
  # print(W.shape)  # >>> torch.Size([2])
  # print(b.shape)  # >>> torch.Size([1])
  u = torch.sum(X * W, dim=1) + b
  z = activate(u)
  return z

In [9]:
def activate(u):
  return F.sigmoid(u)

In [10]:
def loss_fn(y_pred, y):
  loss = torch.square(y_pred - y).mean()
  assert loss.shape == () or loss.shape == (1,)
  return loss

In [11]:
def gradient(W, b, X, y):
  # W.shape: (2,), b.shape: (1,), X.shape: (12, 2), y.shape: (12)
  y_pred = model(X, W, b)
  dl_dy = 2 * (y_pred - y)
  dl_dy = dl_dy.unsqueeze(dim=-1)  # dl_dy_pred.shape: [12, 1]

  dy_df = 1.0

  z = torch.sum(X * W, dim=-1) + b
  ds_dz = activate(z) * (1.0 - activate(z))
  ds_dz = ds_dz.unsqueeze(dim=-1)  # ds_dz_pred.shape: [12, 1]

  W_grad = torch.mean(dl_dy * dy_df * ds_dz * X, dim=0)
  b_grad = torch.mean(dl_dy * dy_df * ds_dz * 1.0, dim=0)

  return W_grad, b_grad

In [12]:
def learn(W, b, train_data_loader):
  MAX_EPOCHS = 20_000
  LEARNING_RATE = 0.01

  for epoch in range(0, MAX_EPOCHS):
    batch = next(iter(train_data_loader))
    input, target = batch
    y_pred = model(input, W, b)
    loss = loss_fn(y_pred, target)

    W_grad, b_grad = gradient(W, b, input, target)

    if epoch % 100 == 0:
      print("[Epoch:{0:6,}] loss:{1:8.5f}, w0:{2:6.3f}, w1:{3:6.3f}, b:{4:6.3f}".format(
        epoch, loss.item(), W[0].item(), W[1].item(), b.item()
      ), end=", ")
      print("W.grad: {0}, b.grad:{1}".format(W_grad, b_grad))

    W = W - LEARNING_RATE * W_grad
    b = b - LEARNING_RATE * b_grad

In [13]:
def main():
  W = torch.ones((2,))
  b = torch.zeros((1,))

  simple_dataset = SimpleDataset()
  train_data_loader = DataLoader(dataset=simple_dataset, batch_size=len(simple_dataset))
  batch = next(iter(train_data_loader))

  input, target = batch

  y_pred = model(input, W, b)
  print(y_pred.shape)
  print(y_pred)

  loss = loss_fn(y_pred, target)
  print(loss)

  learn(W, b, train_data_loader)


if __name__ == "__main__":
  main()


torch.Size([12])
tensor([0.8022, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9973, 0.9837, 1.0000,
        1.0000, 1.0000, 0.6225])
tensor(0.2254)
[Epoch:     0] loss: 0.22539, w0: 1.000, w1: 1.000, b: 0.000, W.grad: tensor([0.0020, 0.0311]), b.grad:tensor([0.0271])
[Epoch:   100] loss: 0.22369, w0: 0.998, w1: 0.969, b:-0.027, W.grad: tensor([0.0030, 0.0307]), b.grad:tensor([0.0269])
[Epoch:   200] loss: 0.22203, w0: 0.994, w1: 0.939, b:-0.054, W.grad: tensor([0.0040, 0.0302]), b.grad:tensor([0.0267])
[Epoch:   300] loss: 0.22041, w0: 0.989, w1: 0.909, b:-0.080, W.grad: tensor([0.0052, 0.0297]), b.grad:tensor([0.0264])
[Epoch:   400] loss: 0.21882, w0: 0.984, w1: 0.879, b:-0.107, W.grad: tensor([0.0064, 0.0291]), b.grad:tensor([0.0261])
[Epoch:   500] loss: 0.21727, w0: 0.977, w1: 0.850, b:-0.133, W.grad: tensor([0.0077, 0.0285]), b.grad:tensor([0.0258])
[Epoch:   600] loss: 0.21575, w0: 0.968, w1: 0.822, b:-0.158, W.grad: tensor([0.0092, 0.0279]), b.grad:tensor([0.0254])
[Epoch:   700] 