### Logistic Binary Classification

In [37]:
# 필요한 라이브러리 호출
import torch
from torch.optim import Adam
from torch.nn import Linear, MSELoss, Sequential, Sigmoid, BCELoss
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [38]:
# dataset 만들기 (공부시간, 출석횟수 -> 합격여부)
x = torch.FloatTensor([[1, 3], [2, 2], [3, 1], [4, 6], [5, 5], [6, 4]])
y = torch.FloatTensor([[0], [0], [0], [1], [1], [1]])

In [39]:
w = torch.empty([2, 1], requires_grad=True) # 2행 1열
b = torch.empty([1], requires_grad=True)
torch.nn.init.uniform_(w)
torch.nn.init.uniform_(b)

tensor([0.3571], requires_grad=True)

In [40]:
# cost function 정의
def cost():
    z = torch.matmul(x, w) + b
    hx = torch.sigmoid(z)
    cost_i = F.binary_cross_entropy(hx, y)
    c = torch.mean(cost_i)
    return c

In [41]:
# 학습
optimizer = Adam([w, b], lr=0.01)

for epoch in range(1000):
    optimizer.zero_grad()
    c = cost()
    c.backward()
    optimizer.step()
    print(f'cost : {c.item()}')

cost : 1.1251245737075806
cost : 1.1038978099822998
cost : 1.082881212234497
cost : 1.06209135055542
cost : 1.0415459871292114
cost : 1.021263599395752
cost : 1.0012643337249756
cost : 0.981569230556488
cost : 0.962200939655304
cost : 0.943183422088623
cost : 0.9245421290397644
cost : 0.9063037037849426
cost : 0.8884963393211365
cost : 0.8711493611335754
cost : 0.8542929291725159
cost : 0.8379590511322021
cost : 0.8221794962882996
cost : 0.8069870471954346
cost : 0.7924142479896545
cost : 0.7784934639930725
cost : 0.7652561068534851
cost : 0.7527312636375427
cost : 0.740946352481842
cost : 0.7299246191978455
cost : 0.7196857333183289
cost : 0.7102435231208801
cost : 0.7016060948371887
cost : 0.6937739253044128
cost : 0.6867395043373108
cost : 0.6804868578910828
cost : 0.6749905943870544
cost : 0.6702160239219666
cost : 0.666119396686554
cost : 0.6626488566398621
cost : 0.6597444415092468
cost : 0.6573407053947449
cost : 0.6553673148155212
cost : 0.6537520289421082
cost : 0.652422308921

In [42]:
# 예측 함수 정의
def hxfn(xd):
    xd = torch.FloatTensor(xd)
    z = torch.matmul(xd, w) + b
    hx = torch.sigmoid(z)
    return z.detach().numpy(), hx.detach().numpy()

In [43]:
# 6시간 공부, 6회 출석했을 때 합격 여부 예측하기
print(f'z : {hxfn([[6, 6]])[0][0][0]}')
print(f'합격여부 : {hxfn([[6, 6]])[1][0][0]}')

z : 4.583599090576172
합격여부 : 0.9898853302001953


---

In [44]:
# from torch.nn import Linear, MSELoss, Sequential, Sigmoid
sig = Sigmoid()
sig(torch.tensor(3, dtype=torch.float32))

tensor(0.9526)

In [48]:
# 학습
model = Sequential()
model.add_module('linear', Linear(2, 1)) # feature 2개, label 1개
model.add_module('sigmoid', Sigmoid())
loss_fn = BCELoss()
optimizer = Adam(model.parameters(), lr=0.1)

for epoch in range(1000):
    optimizer.zero_grad()
    hx = model(x) # z = matmul(x, w) + b, hx = sigmoid(z)
    cost = loss_fn(hx, y)
    cost.backward()
    optimizer.step()
    print(f'cost : {c.item()}')

cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
cost : 0.09019371122121811
c

In [None]:
# 6시간 공부, 6회 출석했을 때 합격 여부 예측하기
model(torch.FloatTensor([[6, 6]]))

tensor([[1.0000]], grad_fn=<SigmoidBackward>)

In [None]:
# 5시간 공부, 5회 출석했을 때 합격 여부 예측하기
model(torch.FloatTensor([[5, 5]]))

tensor([[0.9990]], grad_fn=<SigmoidBackward>)

---

### Evaluation (accuracy, f1_score, confusion matrix, ROC curve)

In [57]:
# 전체에 대한 예측값
pred = model(x)
pred = (pred > 0.5) + 0
pred

tensor([[0],
        [0],
        [0],
        [1],
        [1],
        [1]])

In [60]:
(pred == y).numpy().mean()

1.0

In [54]:
y

tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.]])

In [61]:
# evaluation 관련 라이브러리 호출
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

In [None]:
# accuracy : 전체 데이터 중 맞춘 것의 비율
accuracy_score(y.numpy(), pred.numpy())

1.0

In [None]:
# f1 score : 정밀도와 재현도의 조화평균
f1_score(y.numpy(), pred.numpy())

1.0

In [67]:
# confusin matrix : TP, FN, FP, TN
confusion_matrix(y.numpy(), pred.numpy())

array([[3, 0],
       [0, 3]], dtype=int64)