## 데이터 불러오기

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
data = pd.read_csv('assignment_2.csv')
data.head()

Unnamed: 0,Label,bias,experience,salary
0,1,1,0.7,48000
1,0,1,1.9,48000
2,1,1,2.5,60000
3,0,1,4.2,63000
4,0,1,6.0,76000


## 데이터 스케일링

experience와 salary를 스케일링한다.

In [2]:
bias = data["bias"]
Label = data["Label"]
scaler = StandardScaler() # Scaling
df_new = pd.DataFrame(scaler.fit_transform(data), columns = data.columns)
df_new["bias"] = bias
df_new["Label"] = Label
df_new

Unnamed: 0,Label,bias,experience,salary
0,1,1,-1.513249,-1.206602
1,0,1,-1.090751,-1.206602
2,1,1,-0.879501,-0.432312
3,0,1,-0.280962,-0.238739
4,0,1,0.352786,0.600075
...,...,...,...,...
195,0,1,0.528827,1.116268
196,0,1,0.669660,0.406502
197,0,1,0.035912,0.341978
198,1,1,1.444241,0.148406


## sigmoid

In [3]:
import random

In [4]:
y = np.array(df_new["Label"])
x = np.array(df_new[["bias", "experience", "salary"]])
beta = np.array([random.random(), random.random(), random.random()]) # 임의의 beta값 생성
beta

array([0.03287644, 0.06530109, 0.06880682])

sigmoid는 개별 $x_i$(bias, experience, salary) 입력값에 대해 $y$가 1일 확률을 계산하는 함수이다.  
$p = \frac{1}{1+exp(-ax+b)}$  
$e$의 승수(multiplier)는 $\sum\beta_i x_i$ 이다.

In [5]:
def sigmoid(x, beta) :
    multiplier = 0
    for i in range(x.size):
        multiplier += x[i]*beta[i]
    p = 1.0/(1.0+np.exp(-multiplier))
    return p
sigmoid(x[0], beta)

0.4628279970351887

## log likelihood

$l(p) = \sum[y_i\log(p)+(1-y_i)\log(1-p)]$

In [6]:
#개별 likelihood, 각각의 x입력값에 대한 p의 값 산정
def lg_likelihood_i(x, y, beta, j) :
    p_hat = 0
    p = sigmoid(x[j], beta)
    p_hat += y[j]*np.log(p) + (1-y[j])*np.log(1-p)
    return p_hat
lg_likelihood_i(x, y, beta, 0)

-0.7703997906021487

In [7]:
def lg_likelihood(x, y, beta) :
    log_p_hat = 0
    for i in range(y.size) :
        log_p_hat += lg_likelihood_i(x, y, beta, i) # log p 의 추정값에 계속 더해준다.
        
    return log_p_hat
lg_likelihood(x, y, beta)

-140.43178918699508

## gradient Ascent

get_gradients는 cost function(log likelihood)상에서 각각의 beta 계수들로 편미분했을 때, 각각의 기울기를 구하는 함수이다.

In [8]:
# gradients 한 번 구하기
def get_gradients(x, y, beta):
    gradients = []
    
    for i in range(x[0].size) :
        gradient = 0                                  # 각 계수별 기울기
        for j in range(y.size) :
            p = sigmoid(x[j], beta)
            gradient += (y[j] - p)*x[j][i]            # 개별 데이터 x에 대한 값을 합산
               
        gradient = gradient/y.size                    # 전체 n 값으로 나누기
        gradients.append(gradient)
    
    gradients = np.array(gradients)
    
    return gradients

gradients = np.array(get_gradients(x, y, beta))
gradients

array([-0.24818144,  0.07673107, -0.11605635])

step은 구한 기울기를 바탕으로 다음 학습을 진행할 지점을 지정하는 함수이다.

In [9]:
def step(beta, gradients, stepsize=np.array([0.01,0.01,0.01])) : #stepsize:학습률
    beta = beta + stepsize*gradients
    return beta

In [10]:
step(beta, gradients)

array([0.03039463, 0.0660684 , 0.06764625])

In [11]:
#max_cycle:최대 학습 횟수
#tolerance:이 값보다 step의 변화율이 낮으면 학습을 종료함
#theta_0:학습 이전의 계수
#theta:학습 이후의 계수

def gradientAscent(x, y, beta, max_cycle = 200000, tolerance = 0.000001, stepsize=np.array([0.05,0.05,0.05])) :
    theta_0 = beta
    i = 0
    cost = lg_likelihood(x, y, theta_0)/y.size
    gradients = np.array([])
    while i < max_cycle:
        gradients = get_gradients(x, y, theta_0)
        theta = step(theta_0, gradients, stepsize)
        temp = theta_0 - theta
        theta_0 = theta
        
        if i % 1000 == 0:
            print(gradients)
            #print(theta_0)
            #print(theta)
            #print(np.abs(temp.sum()))
        if np.abs(temp.sum()) < tolerance :
            print("stop")
            break
        i += 1
    return theta_0

In [12]:
beta.sum()

0.1669843507251335

In [13]:
beta = gradientAscent(x, y, beta)
beta

[-0.24818144  0.07673107 -0.11605635]
[-0.00651793  0.02267369 -0.02229783]
[-0.00362895  0.01129792 -0.01103996]
[-0.00229452  0.00690366 -0.00671948]
[-0.00155748  0.0046094  -0.00447464]
[-0.00110184  0.00322973 -0.00312958]
[-0.00080013  0.00233094 -0.0022557 ]
[-0.00059126  0.0017153  -0.00165834]
[-0.00044226  0.00127927 -0.00123591]
[-0.00033369  0.00096319 -0.00093005]
[-0.00025338  0.00073023 -0.00070482]
[-0.0001933   0.00055644 -0.00053691]
[-0.00014799  0.00042563 -0.00041059]
[-0.0001136   0.0003265  -0.00031491]
[-8.73724279e-05  2.51003094e-04 -2.42061006e-04]
[-6.73060638e-05  1.93282606e-04 -1.86377090e-04]
[-5.19091579e-05  1.49023683e-04 -1.43687703e-04]
[-4.00705333e-05  1.15010814e-04 -1.10885726e-04]
[-3.09533158e-05  8.88271054e-05 -8.56369845e-05]
[-2.39232945e-05  6.86438052e-05 -6.61760588e-05]
stop


array([-2.11010161,  4.52136692, -4.38941496])

In [14]:
lg_likelihood(x, y, beta) # 수렴한 우도

-57.47849343109985

## 예측

In [15]:
Label_predict = []
for i in range(y.size) :
    p = sigmoid(x[i], beta)  # 학습한 beta 값으로 p를 추정한다.
    if p > 0.5 :
        Label_predict.append(1) # p값이 0.5보다 크면 1로 분류한다.
    else :
        Label_predict.append(0)
Label_predict = np.array(Label_predict)
Label_predict

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 1])

## confusion_matrix

In [16]:
from sklearn.metrics import *
tn, fp, fn, tp = confusion_matrix(Label, Label_predict).ravel()
confusion_matrix(Label, Label_predict)

array([[140,   8],
       [ 13,  39]], dtype=int64)

In [17]:
#Accuracy
Accuracy = (tp+tn)/(tp+fn+fp+tn)
Accuracy

0.895