# Logistic Regression

### Imports

In [16]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as func
import torch.optim as opt

#from google.colab import drive
#drive.mount('/content/drive')

# Seed 고정
torch.manual_seed(1)

<torch._C.Generator at 0x7feb144ae790>

### 1. Data loading

|$x_1$|$x_2$|$y$|
|:---:|:---:|:---:|
|1|2|0|
|2|3|0|
|3|1|0|
|4|3|1|
|5|3|1|
|6|2|1|

In [17]:
# Data 수동으로 입력하기
x_train = torch.FloatTensor([[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]])
y_train = torch.FloatTensor([[0], [0], [0], [1], [1], [1]])

print(x_train)
print(x_train.shape)
print(y_train)
print(y_train.shape)

# Model 설계
class Logistic(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        return self.sigmoid(self.linear(x))

# Model 초기화 (입력 dim, 출력 dim)
model = Logistic(2, 1)

tensor([[1., 2.],
        [2., 3.],
        [3., 1.],
        [4., 3.],
        [5., 3.],
        [6., 2.]])
torch.Size([6, 2])
tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.]])
torch.Size([6, 1])


### 2. Hypothesis, Cost and Optimization

- 다음 수식을 만족한다고 가정한다
  + Input이 1개(Scalar)인 경우
  $$ y_{hypo} = H(x) = \cfrac{1}{1+e^{-αx}} $$
  + Input이 여러개(Vector)인 경우
  $$ y_{hypo} = H(x) = \cfrac{1}{1+e^{-W^TX}} $$

- ex) data가 3개인 경우, 
$$ y = \cfrac{1}{1+e^{-	\begin{bmatrix} 
	w_1 & w_2 & w_3 \\
	\end{bmatrix}⋅	\begin{bmatrix} 
	x_1 \\
  x_2 \\
  x_3 \\
	\end{bmatrix}}} $$
  + 이를 만족하는 $W$를 구해야 한다

- Cost function으로 Binary Cross Entropy를 이용한다

$$ -[y_{train}log(y_{hypo}) + (1 - y_{train})log(1 - y_{hypo})] $$

In [18]:
# Optimizer 설정 (learning rate = 1로 설정)
optimizer = opt.SGD(model.parameters(), lr=1)

# 반복
for epoch in range(100):

  # Cost 계산 / mse_loss(가정에의한값, 참값)
  y_hypo = model(x_train)
  cost = func.binary_cross_entropy(y_hypo, y_train)

  # cost를 이용해 model update
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  # 10번 마다 중간결과 출력
  if epoch % 10 == 9:
    prediction = y_hypo >= torch.FloatTensor([0.5])
    correct_prediction = prediction.float() == y_train
    accuracy = correct_prediction.sum().item() / len(correct_prediction)
    print('Epoch {:4d}/{} Cost: {:.6f} Accuracy {:2.2f}%'.format(
            epoch, 100, cost.item(), accuracy * 100,
    ))

Epoch    9/100 Cost: 0.764956 Accuracy 66.67%
Epoch   19/100 Cost: 0.596826 Accuracy 83.33%
Epoch   29/100 Cost: 0.491604 Accuracy 83.33%
Epoch   39/100 Cost: 0.405330 Accuracy 83.33%
Epoch   49/100 Cost: 0.323565 Accuracy 83.33%
Epoch   59/100 Cost: 0.249201 Accuracy 83.33%
Epoch   69/100 Cost: 0.191438 Accuracy 100.00%
Epoch   79/100 Cost: 0.159492 Accuracy 100.00%
Epoch   89/100 Cost: 0.145180 Accuracy 100.00%
Epoch   99/100 Cost: 0.135187 Accuracy 100.00%


# 3 Assignment
  * Data 파일을 이용한 Logistic Regression
    * data_logistic_regression.csv을 이용해 학습한다
      *8개의 input, 1개의 output  
      \begin{bmatrix}x_1,\ldots,x_8,y\end{bmatrix}  
      * Accuracy 75% 이상을 달성한다

In [1]:
#imports
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as func
import torch.optim as opt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Seed 고정
torch.manual_seed(1)

<torch._C.Generator at 0x7feb144ae790>

In [5]:
#dataset 불러오기 (구글 드라이브 사용)
dataset = np.loadtxt(
    '/content/drive/MyDrive/Colab Notebooks/data_logistic_regression.csv', 
    delimiter=',', 
    dtype=np.float32)

In [6]:
# Dataset의 순서를 random으로 섞기
np.random.shuffle(dataset)

In [11]:
# torch tensor로 변환 (Input은 8개,Output은 1개이며 각 data의 마지막 줄이 Output이 되게 하였다.)
x_train = torch.FloatTensor(dataset[:,:-1])
y_train = torch.FloatTensor(dataset[:,[-1]])

In [12]:
# Logistic Regression Model 설계
class Logistic(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        return self.sigmoid(self.linear(x))

In [13]:
# Model 초기화 (입력 dim, 출력 dim)
model = Logistic(8, 1)

In [14]:
# Optimizer 설정 (learning rate = 2로 설정)
optimizer = opt.SGD(model.parameters(), lr=2)

In [15]:
for epoch in range(1000):

  # Cost 계산 / mse_loss(가정에의한값, 참값)
  y_hypo = model(x_train)
  cost = func.binary_cross_entropy(y_hypo, y_train)

  # cost를 이용해 model update
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  # 100번 마다 중간결과 출력(epoch=1000)
  if epoch % 100 == 99:
    prediction = y_hypo >= torch.FloatTensor([0.5])
    correct_prediction = prediction.float() == y_train
    accuracy = correct_prediction.sum().item() / len(correct_prediction)
    print('Epoch {:4d}/{} Cost: {:.6f} Accuracy {:2.2f}%'.format(
            epoch, 1000, cost.item(), accuracy * 100,
    ))

Epoch   99/1000 Cost: 0.473820 Accuracy 77.08%
Epoch  199/1000 Cost: 0.472031 Accuracy 77.08%
Epoch  299/1000 Cost: 0.471753 Accuracy 76.81%
Epoch  399/1000 Cost: 0.471690 Accuracy 76.94%
Epoch  499/1000 Cost: 0.471675 Accuracy 76.94%
Epoch  599/1000 Cost: 0.471672 Accuracy 76.94%
Epoch  699/1000 Cost: 0.471671 Accuracy 76.94%
Epoch  799/1000 Cost: 0.471671 Accuracy 76.94%
Epoch  899/1000 Cost: 0.471671 Accuracy 76.94%
Epoch  999/1000 Cost: 0.471671 Accuracy 76.94%
