### 경사 하강법으로 로지스틱 회귀 구현하기.

In [None]:
import numpy as np
import pandas as pd
import warnings
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
warnings.filterwarnings('ignore')               # Turn the warnings off.

1). 데이터를 읽어온다.

In [None]:
# 데이터를 가져온다.
data = load_breast_cancer()
# 설명변수.
X = data['data']
# 반응변수. 0 = '양성', 1 = '악성'.
Y = 1 - data['target']

In [None]:
print(data['DESCR'])

In [None]:
# 데이터를 분리해 둔다.
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=1234)

2). 'sigmoid' 함수와 'gradient' 함수를 정의한다.

In [None]:
def sigmoid(x):
    s = 1.0/(1.0 + np.exp(-x))
    return s

def gradient(X, Y, beta):
    z = np.dot(X,beta.T)*Y
    ds = -Y*(1-sigmoid(z))*X
    return ds.sum(axis=0)

3). 'LogisticRegression' 클래스를 정의한다.

In [None]:
class LogisticRegression:
    def __init__(self, learn_rate):
        self.rate = learn_rate
        self.n_nodes = None
        self.beta = None
        
    def train(self, input_X, input_Y, n_epochs):
        self.n_nodes = input_X.shape[1] + 1
        self.beta = np.random.normal(0.0,1.0,(1,self.n_nodes))
        ones_column = np.ones((input_X.shape[0],1))
        X = np.concatenate((ones_column,input_X),axis=1)
        Y = (2*input_Y - 1).reshape(-1,1)
        for _ in range(n_epochs):
            self.beta = self.beta - self.rate*gradient(X,Y,self.beta)
        return self.beta
    
    def query(self, input_X, prob=True, cutoff=0.5):
        ones_column = np.ones((input_X.shape[0],1))
        X = np.concatenate((ones_column,input_X),axis=1)
        z = np.dot(X,(self.beta).T)
        p = sigmoid(z)
        if prob :
            return p
        else:
            return (p > cutoff).astype('int')

#### Sample run:

In [None]:
# 학습 속도.
learning_rate = 0.001

In [None]:
# Train 과 예측.
LR = LogisticRegression(learning_rate)
LR.train(X_train, Y_train, 2000)
Y_pred = LR.query(X_test,prob=False,cutoff=0.5)

In [None]:
# 정확도 출력.
acc = (Y_pred == Y_test.reshape(-1,1)).mean()
print('Accuracy : {}'.format(np.round(acc,3)))