<a href="https://colab.research.google.com/github/6X10/advanced-statistical-data-analysis/blob/main/one_layer_logistic_regression_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt


In [None]:
path = './datasets/'

transform = transforms.Compose([transforms.ToTensor()]) #별다른 transformation 없이 데이터 가져오기
# 1*28*28 -> 784 = 28*28 (하나의 벡터로 납작하게 만들어서 사용)
train_data = MNIST(root=path,train=True,transform=transform,download=True) #MNIST data: 0~9 사이의 숫자
test_data = MNIST(root=path,train=False,transform=transform,download=True)

# choose train data with label 0 or 1
idx = (train_data.targets==0) | (train_data.targets==1)
train_data.targets = train_data.targets[idx]
train_data.data = train_data.data[idx]

# choose test data with label 0 or 1
idx = (test_data.targets==0) | (test_data.targets==1)
test_data.targets = test_data.targets[idx]
test_data.data = test_data.data[idx]

batch_size = 85 #별 이유는 없고 traing data가 85로 잘 나누어 떨어짐

train_loader = DataLoader(dataset=train_data,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(dataset=test_data,batch_size=len(test_data),shuffle=False) #배치 단위 X. 한꺼번에 모든 데이터(솎아낸 0과 1 label 가진 데이터 개수) 갖고 올 것. (loss, accuracy 한 번에 구하려고)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./datasets/MNIST/raw/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw



In [None]:
#weight parameter w & bias parameter
# we are assuming one layer logistic regression
 # 1*28*28 -> 784 로 reshaping
w = np.random.randn(28*28,1)
b = np.random.randn(1,1) #shape을 맞추기 위해서 matrix 형태 #굳이 (1,1)로 할 필요는 없지만 연산에 shape이 맞아야 하는 경우가 있으므로 엄밀히 (1,1)로 표현
eta = 1e-4 # learning rate for SGD
delta = 1e-10 # prevent log 0


In [None]:
# define sigmoid function
def sigmoid(val):
    return 1/(1+np.exp(-val))

# define derivative of sigmoid function w.r.t. its value (gradient of sigmoid)
def grad_sigmoid(val):
    return sigmoid(val)*(1-sigmoid(val))

# given data instances in batch form,
# compute loss and gradients of w and b (forward pass로 loss까지 구하고, loss를 w,b로 나눈 gradient 구하기)
# also, count the number of correct prediction
def compute_loss_and_grad(data_instance):
    x, y = data_instance #두 개로 분리해서 생각. (x는 input, y는 true output(label). X로부터 X가 들어왔을 때 Y=1이 될 확률 logistic reg로 modeling)
    #forward pass
    linear=np.matmul(x,w)+b #linear transformation. x.shape=(batch_size, 784), w.shape=(784,1)
    y_est=sigmoid(linear) #modeling 하고자 하는 conditional prob: P(Y=1|X=x)
    loss=-y*np.log(y_est+delta)-(1-y)*np.log(1-y_est+delta) #loss: BCE, 각 data instance 에 대한 loss + log 안에 0 값 들어가면 안되므로 굉장히 작은 delta 값 더해줌
    #gradient ; d/dw loss, d/db loss where loss(y_est(linear(x,b))) : loss는 function of y_est고, y_est는 functon of linear, linear는 function of w&b
    # -> chain rule 활용 : 궁극적으로 구해야하는 건 loss를 우리가 학습할 대상 w,b으로 미분한 것 (10:13 7:00~)
    #d/dlinear loss = d/dy_est loss * d/dlinear*dy_est
    grad= -y*(1-sigmoid(linear))+(1-y)*sigmoid(linear) #linear에 대한 미분까지는 w,b 까지 공통으로 들어가므로
    #with linear에 대한 chain rule
    grad_w=np.multiply(grad,x) #
    grad_b=grad
    hit=(y==np.round(y_est)) #batch 안에서 # of 맞춘 prediction 개수 구하기

    return loss, (grad_w, grad_b), hit #모두 batch 개수만큼 나옴 -> instance-wise loss, grad, hit

# update NN parameters w and b with SGD
def update_parameters(params,grads):
    w, b = params
    grad_w, grad_b = grads
    # fill out here and return the variables correctly anyway you want to
    #parameter update (minibatch 안에서 모든 gradient 평균 구해서 w,b update b/c mini batch SGD 하고 있으므로.)
    #x.shape=(batch_size=85, 784), w.shape=(784,1) -> 85개 데이터에 대한 gradient 존재. 85개 batch에 대해서, 0번 axis에 대해서 average 구하기
    w -= eta * np.mean(grad_w, axis=0).reshape(-1,1) #column-wise average로 vector가 된 것을 다시 matrix로 reshape (vetor랑 matrix랑 더하기 안되므로 ex. 3-dim vector, 2*3 matrix)
    b -= eta * np.mean(grad_b, axis=0).reshape(-1,1)

    return w, b #한 번의 batch로 학습 시킨 w,b


In [None]:
num_epoch = 500 #training data 모델에게 random shuffling 하면서 총 500 번 보여줄 것

for i in range(num_epoch):

    # train the logistic regression model
    total_loss_train = 0 #training loss tracking 하기 위해서 누적해서 더할 var
    count = 0 #한 번 epoch에서 맞춘 횟수 누적해서 더해갈 variable
    for batch_idx, (x, y) in enumerate(train_loader): #batch_idx 의미 없으므로 _로 생략해도 됨. 필요한 건 batch의 (x,y)
        # iteration으로 한번 배치 가져와서 iteration, 두번째 배치 가져와서,..., 모든 데이터 학습
        # x.shape = (batch_size, 1, 28, 28) -> (-1=batch_size*1,784)
        # (x 형태) 85*1*28*28 -> -1,784 (=뒤에는 784로 dimension 맞춰주고 -1: 앞에 남는 거 다 몰아 때려넣어주세요 => -1=85 if 뒤에가 28, -1=28*28 , 28 )
        x, y = x.numpy().reshape(-1,784), y.numpy().reshape(-1,1) #torch로 정의되어있는 tensor를 numpy 형태로 변환(둘이 서로 연산 안되므로)
        # -1이라는 숫자는 dimension 하나하나 shape 맞춰갈 필요 없이 나머지 걍 다 몽땅 때려박아달라는 차원의 숫자
        params = (w, b)
        # compute loss and gradients, and then update the parameters by 위에서 정의한 function
        # also, compute sum of the loss and the number of correct prediction in the batch
        loss, grads, hit = compute_loss_and_grad((x,y)) #(x,y) 라는 tuple이 들어갔을 때 실제로 loss랑 grads, hit return
        w,b=update_parameters(params, grads)
        total_loss_train += loss.sum() #data instance(85개)의 loss sum
        count += hit.sum() #for accuracy

    # compute average loss and accuracy for the train dataset
    loss_train = total_loss_train/len(train_data)  #average loss
    acc_train = count/len(train_data) #train data에 대한 accuracy

    # test, or evaluate, the trained logistic regression model
    dataiter = iter(test_loader) #batch size = test data 전체 불러옴.
    te_images, te_labels = dataiter.next()

    # how can we fit te_images, te_labels to our model?
    te_images, te_labels = te_images.numpy().reshape(-1,784), te_labels.numpy().reshape(-1,1) #pytorch -> numpy

    # compute loss, but you don't need to compute gradients and update parameters -> grad는 _ score 처리해서 굳이 우리가 들고갈 필요X
    # also, compute sum of the loss and the number of correct prediction
    loss, _ , hit = compute_loss_and_grad((te_images, te_labels))
    total_loss_test = loss.sum() #누적합 구할 필요 X. 전체 테스트 데이터에 대한 loss 쫙다 나올 테니까 걍 sum 함

    # compute average loss and accuracy for the test dataset
    loss_test = total_loss_test/len(test_data)
    acc_test = hit.sum()/len(test_data)

    if i % 10 == 0:
        print("Epoch %d Train: %.3f / %.2f %%"%(i,loss_train,acc_train*100))
        print("Epoch %d Test: %.3f / %.2f %%"%(i,loss_test,acc_test*100))
        print()


Epoch 0 Train: 2.574 / 47.85 %
Epoch 0 Test: 2.502 / 47.94 %

Epoch 10 Train: 2.100 / 53.16 %
Epoch 10 Test: 2.031 / 53.66 %

Epoch 20 Train: 1.729 / 58.03 %
Epoch 20 Test: 1.660 / 58.49 %

Epoch 30 Train: 1.437 / 62.77 %
Epoch 30 Test: 1.368 / 63.69 %

Epoch 40 Train: 1.207 / 67.11 %
Epoch 40 Test: 1.138 / 67.66 %

Epoch 50 Train: 1.025 / 70.95 %
Epoch 50 Test: 0.958 / 71.77 %

Epoch 60 Train: 0.881 / 74.14 %
Epoch 60 Test: 0.814 / 74.47 %

Epoch 70 Train: 0.766 / 76.85 %
Epoch 70 Test: 0.700 / 78.01 %

Epoch 80 Train: 0.673 / 79.08 %
Epoch 80 Test: 0.608 / 80.05 %

Epoch 90 Train: 0.596 / 80.99 %
Epoch 90 Test: 0.533 / 82.46 %

Epoch 100 Train: 0.534 / 82.70 %
Epoch 100 Test: 0.472 / 83.88 %

Epoch 110 Train: 0.482 / 84.28 %
Epoch 110 Test: 0.422 / 85.39 %

Epoch 120 Train: 0.439 / 85.76 %
Epoch 120 Test: 0.380 / 86.57 %

Epoch 130 Train: 0.402 / 86.93 %
Epoch 130 Test: 0.345 / 87.71 %

Epoch 140 Train: 0.371 / 87.91 %
Epoch 140 Test: 0.315 / 88.84 %

Epoch 150 Train: 0.344 / 88.96 %