In [None]:
import numpy as np

true_b = 1
true_w = 2
N = 100

np.random.seed(42)
x = np.random.rand(N,1) # Unif(0,1)에서 난수 추출
epsilon = (0.1 * np.random.randn(N, 1)) # N(0,1)에서 난수 추출
y = true_b + true_w * x + epsilon # * 대신 @ 쓰면 행렬곱



# Shuffles the indices
idx = np.arange(N) # index를 만드는 코드
split_index = int(N * 0.8) # train-validation split

train_idx = idx[:split_index]
val_idx = idx[split_index:]



# Generates train and validation sets
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

In [None]:
import numpy as np
import time
def timer(func) :
  def wrapper(*args, **kwargs) :
    start_time = time.time()
    result = func(*args, **kwargs)
    end_time = time.time()
    computation_time = end_time - start_time
    print(f"Execution time of {func.__name__} : {computation_time} seconds")
    return result
  return wrapper


@timer
def train_model_numpy(lr = 0.1, epochs = 1000) : # epochs : 몇 번 GD를 돌릴 것이냐냐
  # Initialize parameters
  b = np.random.randn(1)
  w = np.random.randn(1)

  for epoch in range(epochs) :
    # Loss Computation
    y_hat = b + w * x_train #b_hat, w_hat
    error = y_hat - y_train
    mse_loss = np.mean(error ** 2)

    # Gradient Computation
    b_grad = 2 * np.mean(error)
    w_grad = 2 * np.mean(x_train * error) # 미분 유도해서 나온 식
    b = b - lr * b_grad
    w = w - lr * w_grad

  return b, w

In [None]:
b_hat, w_hat = train_model_numpy()
print("b_estimate:{}, w_estimate: {}". format(b_hat, w_hat))

Execution time of train_model_numpy : 0.030369997024536133 seconds
b_estimate:[1.0234136], w_estimate: [1.93680757]


Pytorch


In [None]:
import torch
# create tensor at CPU
x_train_tensor = torch.as_tensor(x_train)
y_train_tensor = torch.as_tensor(y_train)

# create tensor at GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
x_train_tensor = torch.as_tensor(x_train).to(device)
y_train_tensor = torch.as_tensor(y_train).to(device)

In [None]:
 # inplace 연산 쓰면 메모리 효율성이 높아진다 ~ 메모리 주소값을 거기다가 update

def train_model_torch(lr=0.1, epochs=1000):
    # Initialize parameters
    ## requires_grad : pytorch와 numpy와 다른 점 (직접 미분계산 x), b와w는 학습대상이에요~라고 지정해주는 것
    ## dtype : 자료의 형태를 지정, float double int 등등
    ## device : CPU면 CPU에 올려라, GPU면 GPU에 올려라
    b = torch.randn(1, requires_grad=True, dtype=torch.float, device=device)
    w = torch.randn(1, requires_grad=True, dtype=torch.float, device=device)

    for epoch in range(epochs):
        # Loss computation
        # 모델 연산부분은 똑같음
        y_hat = b + w * x_train_tensor
        error = (y_hat - y_train_tensor)
        mse_loss = torch.mean(error ** 2)

        # Gradient computation and descent
        # 차이발생
        mse_loss.backward() # 미분 알아서 계산해줌, gradient 계산은 해주는데 update는 안해줌
        with torch.no_grad(): # no_grad : autograd 기능을 잠시 멈춤 -> 안하면 컴퓨터 멈춘다
            b -= lr * b.grad # update 구현해줘야함
            w -= lr * w.grad
        b.grad.zero_() #"_" inplace operation을 쓰라는 기호, gradient 0으로 초기화
        w.grad.zero_()
    return b, w

In [None]:
b_hat, w_hat = train_model_torch()
print("b_estimate:{}, w_estimate: {}". format(b_hat, w_hat))

b_estimate:tensor([1.0234], requires_grad=True), w_estimate: tensor([1.9368], requires_grad=True)


Pytorch advance

In [None]:
import torch.optim as optim
import torch.nn as nn



def train_model_torch(lr=0.1, epochs=1000):
    # Initialize parameters
    b = torch.randn(1, requires_grad=True, dtype=torch.float, device=device)
    w = torch.randn(1, requires_grad=True, dtype=torch.float, device=device)

    parameters = [b,w]
    optimizer = optim.SGD(parameters, lr=lr) ## learning rate 넣어주기
    mse_loss = nn.MSELoss() ## Loss를 바꾸려면 갈아끼면된다.

    for epoch in range(epochs):
        # Loss computation
        y_hat = b + w * x_train_tensor
        loss = mse_loss(y_hat, y_train_tensor)
        # Gradient computation and descent
        loss.backward() # 미분 알아서 계산해줌, gradient 계산은 해주는데 update는 안해줌
        optimizer.step() # 위의 with 문
        optimizer.zero_grad() # gradient 0으로 초기화, parameter 개수가 바뀌어도 상관없음
    return b, w

In [None]:
b_hat, w_hat = train_model_torch()
print("b_estimate:{}, w_estimate: {}". format(b_hat, w_hat))

b_estimate:tensor([1.0234], requires_grad=True), w_estimate: tensor([1.9368], requires_grad=True)
