In [2]:
import numpy as np

In [5]:
# Generate data

# true value = 추정과 학습의 대상

true_b = 1 # true bias
true_w = 2 # true weight
N = 100 # number of data

np.random.seed(42)
x = np.random.rand(N,1) # runif(N), (N,1) arrary return
epsilon = 0.1 * np.random.randn(N,1) # rnorm(N)
y = true_b + true_w * x + epsilon # vector 곱 ( => @ : 행렬곱)

In [11]:
# Train-Validation Split

# validation과 test 구분할 것
# train과 test의 분포가 서로 같아야함!!!! => need to be checked

# shuffles the indices
idx = np.arange(N) # 0~99 vector 생성
split_index = int(N * 0.8) # int() 정수로 반환

# split_index 전/후로 Train-Validation split
train_idx = idx[:split_index] # 0~79
val_idx = idx[split_index:] # 80~99

x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

In [10]:
# our goal : validation set을 가장 잘 설명하는 w와 b를 train set을 통해 도출
# y와 y_hat의 차이가 가장 작아지도록 : minimize MSE(loss)

# loss surface : w_hat, b_hat을 grid로 하고 그 때의 MSE
# gradient descent : 미분값을 빼주면 방향성 설정 가능
# gradient = 최소점으로 가는 벡터의 방향성을 의미함
# 실제 paramater : gradient * learning rate로 업데이트 => 수렴을 위하여

80

In [22]:
# 시간 측정하는 코드
# @timer를 함수 위에 달아둘 것!!
import time
def timer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args,**kwargs)
        end_time = time.time()
        computation_time = end_time - start_time
        print(f"Execution time of {func.__name__}: {computation_time} seconds")
        return result
    return wrapper

In [23]:
# gradient Descent for Linear regression
@timer
def train_model_numpy(lr = 0.1, epochs = 1000):
    # Initialize parameters : DL의 성능에 큰 영향
    b = np.random.randn(1)
    w = np.random.randn(1)

    for epoch in range(epochs):
        
        # Loss computation
        y_hat = b + w * x_train
        error = (y_hat - y_train)
        mse_loss = np.mean(error ** 2)
        
        # Gradient computation : 미분 수식 넣어볼것
        b_grad = 2 * np.mean(error)
        w_grad = 2 * np.mean(x_train * error)
        b = b - lr * b_grad
        w = w - lr * w_grad
    return b, w

Execution time of train_model_numpy: 0.026873350143432617 seconds


(array([1.02341381]), array([1.93680715]))

In [25]:
b_hat, w_hat = train_model_numpy()
print("b_estimate:{}, w_estimate{}".format(b_hat, w_hat))

Execution time of train_model_numpy: 0.023286819458007812 seconds
b_estimate:[1.02341396], w_estimate[1.93680685]


In [42]:
# pip install torch

import torch

# create tensor at CPU
x_train_tensor = torch.as_tensor(x_train)
y_train_tensor = torch.as_tensor(y_train)

# create tensor at GPU
# 현재 GPU가 연결이 되어있다면 그곳에 자료를 넣겠다
device = 'cuda' if torch.cuda.is_available() else 'cpu' 
x_train_tensor = torch.as_tensor(x_train).to(device)
y_train_tensor = torch.as_tensor(y_train).to(device)

x_val_tensor = torch.as_tensor(x_val).to(device)
y_val_tensor = torch.as_tensor(y_val).to(device)

# CPU,GPU 상의 data는 통신 없이는 서로 계산이 불가능


In [32]:
# Gradient Descent by PyTorch
@timer
def train_model_torch(lr = 0.1, epochs = 1000):

    # Initialize paramaters
    
    # requires_grad = True : 학습대상임을 설정
    # dtype : 수치형 자료의 타입을 지정(default = float32)
    # device : CPU인지 GPU인지
    b = torch.randn(1, requires_grad = True, dtype = torch.float, device = device)
    w = torch.randn(1, requires_grad = True, dtype = torch.float, device = device)

    for epoch in range(epochs):
        # Loss computation
        y_hat = b + w * x_train_tensor
        error = (y_hat - y_train_tensor)
        mse_loss = torch.mean(error ** 2)

        # Gradient computation and descent

        # backward : gradient를 자동으로 계산해 b,w안에 저장해줌
        mse_loss.backward()

        # update는 직접 해줘야함
        # in-place operation : memory의 위치가 변하지 않음(같은 메모리 주소에 값을 업데이트)
        # 만약 memory 주소가 바뀐다면 b.grad에 저장된 정보들을 불러올 때 큰 computation loss가 발생
        # no_grad() : autograd를 멈춤
        with torch.no_grad():
            b -= lr * b.grad 
            w -= lr * w.grad
        b.grad.zero_() # zero_ : in-place operation
        w.grad.zero_()
    return b, w

train_model_torch()  


Execution time of train_model_torch: 0.1213986873626709 seconds


(tensor([1.0234], requires_grad=True), tensor([1.9368], requires_grad=True))

In [44]:
import torch.optim as optim
import torch.nn as nn

'''
learning rate : 미분값을 얼마나 이동시킬 것인지(계수) 
lr이 크다면 초반엔 loss가 빠르게 줄어들지만 후반부에 underfitting이 발생할 수 있음
'''
@timer
def train_model_torch_optim(lr = 0.1, epochs = 1000):

    # Initialize paramaters
    
    # requires_grad = True : 학습대상임을 설정
    # dtype : 수치형 자료의 타입을 지정(default = float32)
    # device : CPU인지 GPU인지
    b = torch.randn(1, requires_grad = True, dtype = torch.float, device = device)
    w = torch.randn(1, requires_grad = True, dtype = torch.float, device = device)
    
    parameters = [b, w]
    optimizer = optim.SGD(parameters, lr = lr) # optimizer의 종류, 최적화대상, learning rate 지정
    mse_loss = nn.MSELoss() # 다른 종류의 loss를 사용하고 싶은 경우 함수만 바꿔주면됨
    
    for epoch in range(epochs):
        # Loss computation
        y_hat = b + w * x_train_tensor
        loss = mse_loss(y_hat, y_train_tensor) 

        # Gradient computation and descent
        loss.backward()
        optimizer.step() # paramater update
        optimizer.zero_grad()

    y_pred = b + w * x_val_tensor
    test_MSE = mse_loss(y_pred, y_val_tensor)

    return b, w, test_MSE





Execution time of train_model_torch_optim: 0.13792657852172852 seconds


(tensor([1.0234], requires_grad=True),
 tensor([1.9368], requires_grad=True),
 tensor(0.0098, dtype=torch.float64, grad_fn=<MseLossBackward0>))

In [45]:
b_hat, w_hat, test_MSE = train_model_torch_optim()
print("b_estimate:{}, w_estimate{}, test_MSE{}".format(b_hat, w_hat, test_MSE))

Execution time of train_model_torch_optim: 0.13974785804748535 seconds
b_estimate:tensor([1.0234], requires_grad=True), w_estimatetensor([1.9368], requires_grad=True), test_MSE0.009755489943201115


In [52]:
import pickle
with open('C:/Users/hecor/Downloads/quiz_data.pkl', 'rb') as f:
  data = pickle.load(f)

In [69]:
# dict 형태의 data(key를 이용하여 불러와야함)
N = len(data['x'])
x = data['x']
y = data['y']

idx = np.arange(N) # 0~99 vector 생성
split_index = int(N * 0.8) # int() 정수로 반환

# split_index 전/후로 Train-Validation split
train_idx = idx[:split_index] # 0~79
val_idx = idx[split_index:] # 80~99

x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

# GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu' 
x_train_tensor = torch.as_tensor(x_train).to(device)
y_train_tensor = torch.as_tensor(y_train).to(device)

x_val_tensor = torch.as_tensor(x_val).to(device)
y_val_tensor = torch.as_tensor(y_val).to(device)


Execution time of train_model_torch_optim: 0.1363527774810791 seconds


(tensor([-0.3087], requires_grad=True),
 tensor([-0.1602], requires_grad=True),
 tensor(0.2981, dtype=torch.float64, grad_fn=<MseLossBackward0>))