# Logistic Regression

### Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f0f05773270>

### Training Data

In [3]:
x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]

In [4]:
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)

In [5]:
print(x_train.shape)
print(y_train.shape)

torch.Size([6, 2])
torch.Size([6, 1])


### Computing the Hypothesis

In [6]:
print("e^1 equals: ", torch.exp(torch.FloatTensor([1])))

e^1 equals:  tensor([2.7183])


In [7]:
W = torch.zeros((2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

In [8]:
hypothesis = 1 / (1 + torch.exp(-(x_train.matmul(W) + b)))

In [9]:
print(hypothesis)
print(hypothesis.shape)

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<MulBackward0>)
torch.Size([6, 1])


In [10]:
hypothesis = torch.sigmoid(x_train.matmul(W) + b) # 위에 직접 구한 hypothesis 값과 동일함

In [11]:
print(hypothesis)
print(hypothesis.shape)

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<SigmoidBackward>)
torch.Size([6, 1])


### Loss function

In [12]:
"Low level implemantation"
losses = -(y_train * torch.log(hypothesis) +
           (1-y_train) * torch.log(1-hypothesis))
print(losses)

tensor([[0.6931],
        [0.6931],
        [0.6931],
        [0.6931],
        [0.6931],
        [0.6931]], grad_fn=<NegBackward>)


In [13]:
cost = losses.mean()
print(cost)

tensor(0.6931, grad_fn=<MeanBackward1>)


In [14]:
"High level implemantation"
F.binary_cross_entropy(hypothesis, y_train)

tensor(0.6931, grad_fn=<BinaryCrossEntropyBackward>)

### Training weight parameters

In [15]:
optimizer = optim.SGD([W, b], lr=1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    hypothesis = torch.sigmoid(x_train.matmul(W) + b) 
    cost = F.binary_cross_entropy(hypothesis, y_train)
    
    optimizer.zero_grad() # 이 함수가 없으면 기존 grad에 값을 누적하게 됨
    cost.backward()       # backpropagation
    optimizer.step()      # gradient update
    
    if epoch % 100 == 0:
        print("Epoch {:4d}/{} cost: {:.4f}".format(
            epoch, nb_epochs, cost.item()))

Epoch    0/1000 cost: 0.6931
Epoch  100/1000 cost: 0.1347
Epoch  200/1000 cost: 0.0806
Epoch  300/1000 cost: 0.0579
Epoch  400/1000 cost: 0.0453
Epoch  500/1000 cost: 0.0373
Epoch  600/1000 cost: 0.0317
Epoch  700/1000 cost: 0.0276
Epoch  800/1000 cost: 0.0244
Epoch  900/1000 cost: 0.0219
Epoch 1000/1000 cost: 0.0199


### Evaluation

In [16]:
hypothesis = torch.sigmoid(x_train.matmul(W) + b)
print(hypothesis[:5])

tensor([[2.7648e-04],
        [3.1608e-02],
        [3.8977e-02],
        [9.5622e-01],
        [9.9823e-01]], grad_fn=<SliceBackward>)


In [17]:
prediction = hypothesis >= torch.FloatTensor([0.5])

In [18]:
print(prediction[:5]) # 비교 연산 결과는 ByteTensor임
print(y_train[:5])

tensor([[0],
        [0],
        [0],
        [1],
        [1]], dtype=torch.uint8)
tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.]])


In [19]:
correct_prediction = prediction.float() == y_train
print(correct_prediction)
print(correct_prediction.float().mean())

tensor([[1],
        [1],
        [1],
        [1],
        [1],
        [1]], dtype=torch.uint8)
tensor(1.)


### Higher Implementation with class

In [20]:
import pandas as pd
import numpy as np

In [21]:
xy = pd.read_csv('./data/Lab05_diabetes.csv', header=None, dtype=np.float64) # 설명과 달리 pandas를 사용해 봄
#xy = np.loadtxt('./data/Lab05_diabetes.csv', delimiter=',', dtype=np.float32) 

In [22]:
x_data = xy.iloc[:, :-1]
y_data = xy.iloc[:, [-1]] # -1이 아닌 [-1]임을 유의
x_train = torch.FloatTensor(x_data.values)
y_train = torch.FloatTensor(y_data.values)

In [23]:
# x_data = xy.iloc[:, :-1]
# y_data = xy.iloc[:, -1]
# x_train = torch.FloatTensor(x_data.values)
# y_train = torch.FloatTensor(y_data.values).unsqueeze(1) # xy.iloc[:, [-1]] 대신 언스퀴즈로도 작성 가능

In [24]:
class BinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(8, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        return self.sigmoid(self.linear(x))

In [25]:
model = BinaryClassifier()

In [26]:
optimizer = optim.SGD(model.parameters(), lr=1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    hypothesis = model(x_train)
    cost = F.binary_cross_entropy(hypothesis, y_train)
    
    optimizer.zero_grad() 
    cost.backward()       
    optimizer.step()      
    
    if epoch % 100 == 0:
        prediction = hypothesis >= torch.FloatTensor([0.5])
        correct_prediction = prediction.float() == y_train
        accuracy = correct_prediction.sum().item() / len(correct_prediction)
        
        print("Epoch {:4d}/{} Cost: {:.4f} Accuracy: {:2.2f}".format(epoch, nb_epochs, cost.item(), accuracy*100))

Epoch    0/1000 Cost: 0.7048 Accuracy: 45.72
Epoch  100/1000 Cost: 0.4812 Accuracy: 76.81
Epoch  200/1000 Cost: 0.4741 Accuracy: 76.94
Epoch  300/1000 Cost: 0.4726 Accuracy: 77.21
Epoch  400/1000 Cost: 0.4721 Accuracy: 77.08
Epoch  500/1000 Cost: 0.4719 Accuracy: 76.94
Epoch  600/1000 Cost: 0.4718 Accuracy: 76.68
Epoch  700/1000 Cost: 0.4717 Accuracy: 76.81
Epoch  800/1000 Cost: 0.4717 Accuracy: 76.94
Epoch  900/1000 Cost: 0.4717 Accuracy: 76.94
Epoch 1000/1000 Cost: 0.4717 Accuracy: 76.94
