#### 데이터 로드

In [2]:
import sklearn
import numpy as np

In [3]:
from sklearn.datasets import load_iris
datasets = load_iris()

In [4]:
datasets.keys() # dict타입!

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [5]:
x_data = datasets["data"]
x_data[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [18]:
y_data = datasets["target"]
print(f"target : {y_data}, names : {datasets['target_names']}")

target : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2], names : ['setosa' 'versicolor' 'virginica']


In [19]:
y_data = y_data.reshape([-1,1]) # reshape
y_data[:3]

array([[0],
       [0],
       [0]])

#### one-hot encoding

In [15]:
## 직접 원핫인코딩 적용
onehot = np.zeros((len(y_data), len(datasets['target_names'])))

for ind, y in enumerate(y_data):
  onehot[ind][y] = 1

onehot[:10]

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

In [20]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
enc.fit(y_data)  
y_data = enc.transform(y_data).toarray()
y_data[:10]

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

#### Scaling

In [21]:
from sklearn.preprocessing import MinMaxScaler

min_max_scaler = MinMaxScaler()
x_data_minmax = min_max_scaler.fit_transform(x_data)
x_data_minmax[:3]

array([[0.22222222, 0.625     , 0.06779661, 0.04166667],
       [0.16666667, 0.41666667, 0.06779661, 0.04166667],
       [0.11111111, 0.5       , 0.05084746, 0.04166667]])

#### 데이터 매트릭스 및 가중치 함수

In [23]:
x_0 = np.ones(x_data_minmax.shape[0]) # x_data_minmax.shape[0] : 행 사이즈
x_data_minmax = np.column_stack((x_0, x_data_minmax))

x_data_minmax[:3] # 상수항 자리 만들어줌 (x0, x1, x2, x3, x4)

array([[1.        , 0.22222222, 0.625     , 0.06779661, 0.04166667],
       [1.        , 0.16666667, 0.41666667, 0.06779661, 0.04166667],
       [1.        , 0.11111111, 0.5       , 0.05084746, 0.04166667]])

In [24]:
weights = np.random.uniform(size=(3,5))
weights # weigth function 생성 (w0, w1, w2, w3, w4)

## 각 클래스마다 weight가 존재하기 때문에 3개의 행으로 만들어줌! -> 원래는 벡터 형식

array([[0.73647152, 0.55791304, 0.53174157, 0.36398027, 0.74676532],
       [0.47094401, 0.66099187, 0.45226947, 0.41428082, 0.28292423],
       [0.7413714 , 0.37890944, 0.64836039, 0.19878019, 0.83366674]])

#### softmax function

In [25]:
def softmax(z):
    e = np.exp(z)
    p = e / np.sum(np.exp(z), axis=1).reshape([-1,1])
    return p

In [26]:
z = x_data_minmax.dot(weights.T) # weight와 x값의 linear comvination이므로!
z.shape

(150, 3)

In [27]:
softmax(z)[:5] # 각 클래스마다의 확률을 얻음

array([[0.36157743, 0.26567361, 0.37274896],
       [0.36245391, 0.26921754, 0.36832855],
       [0.36070933, 0.26440791, 0.37488276],
       [0.36124446, 0.26537094, 0.3733846 ],
       [0.36083867, 0.26349875, 0.37566258]])

#### Cost Function (Cross Entropy)

In [28]:
def cross_entropy_function(y, x, weights):
    z = x_data_minmax.dot(weights.T)
    result = - np.sum( # 각 케이스별 entropy를 모두 합산 한 것을 return
                np.sum(
                    (y * np.log(softmax(z))), axis=1).reshape((-1,1)) # y와 softmax(z)를 곱해주기 떄문에 결국엔 정답인 것만 남아 더해지게 된다
                )
    return result 

In [29]:
cross_entropy_function(y_data,x_data_minmax,weights)

171.2262714424512

#### Gradient descent (weights update)

In [32]:
def minimize_grdient(y, x, initial_weights, iterations = 500000, alpha=0.001):
    cost_history= []
    theta_history = []
    m = y.shape[0]
    theta = np.copy(initial_weights)
    
    number_of_classes = theta.shape[0]
    number_of_weights = theta.shape[1]
    
    for _ in range(iterations):
        original_theta = np.copy(theta) # weight값
        
        for k in range(number_of_classes):        
            for j in range(number_of_weights):
                partial_x = x[:, j] # 전체 데이터에서 가중치 j번째 값을 뽑아냄
                partial_entropy = y - softmax(x.dot(original_theta.T))
                # softmax(x.dot(original_theta.T)) : 각 클래스마다 내가 예측한 값 (y헷))
                  # 실제 값에서 y값을 빼준 것!
                theta[k][j]  = original_theta[k][j] + ( alpha * partial_entropy[:,k].dot(partial_x.T) ) /150 
                # k번째 클래스 값만 뽑아서 partial_x.T와 곱해줌
                  # (150,1)을 서로 dot 하여 하나의 값으로 나오게 됨! 하지만 값들을 모두 더해주기 떄문에 150으로 나누어줌
        
        if (_ % 10000) == 0:
            print(cross_entropy_function(y,x,theta)/150)
            cost_history.append(cross_entropy_function(y,x,theta))
    
    return theta, cost_history

In [33]:
# weights = minimize_grdient(y_data, x_data_minmax,weights)
theta, cost_history = minimize_grdient(y_data, x_data_minmax, weights)

1.1413999998369357
0.753791594146045
0.612037164100472
0.5362377701968719
0.4874767590773951
0.4524325946660678
0.4254308326951758
0.4036336809313542
0.38544810766377025
0.36990047912903606
0.3563562834045034
0.34438095409462066
0.33366491200391946
0.3239804655151998
0.3151556705304917
0.3070577615118938
0.29958225461735216
0.29264555410151616
0.2861798000505995
0.2801291942107574
0.27444732653592413
0.2690951950106894
0.26403971559018347
0.25925258492789205
0.254709401167117
0.2503889762673393
0.24627279237301483
0.2423445678182147
0.23858990750395345
0.23499601886933352
0.23155147933895914
0.2282460445219961
0.22507048893648982
0.2220164728917252
0.21907643055877804
0.2162434753193738
0.21351131929425493
0.21087420457786954
0.2083268441925037
0.20586437115567413
0.20348229435470686
0.20117646016046345
0.19894301890221275
0.19677839547815437
0.19467926349925607
0.19264252246398145
0.190665277542968
0.18874482161950878
0.18687861928665256
0.18506429254720907


#### Confusion matrix for multiclass

- Class별로 True positive와 error로 분류
- FN 행 기준, FP 열 기준으로 값 확인

Precision for multiclass
- TP/(TP+FP), 하나의 클래스와 나머지 col 클래스
  - precision A = TP_A / (TP_A + FP_A) = TP_A / (TP_A + FP_AB + FP_AC + FP_AD)
  *FP_AB : A인데 B로 잘못 예측한 것


In [48]:
rand_index= np.random.randint(0,150,30)
rand_index

array([123, 119,  66,  54, 114,  62, 113,  45,  15,  22,  67,  47,  64,
       146, 144, 133,  36,  27,  75, 133, 118,  91,  80,  16,  22,   7,
       134,  20, 112,  41])

In [49]:
y_pred = np.argmax(softmax(x_data_minmax[rand_index].dot(theta.T)),axis=1) # 각 클래스를 예측한 것 중 가장 확륯이 높은 것만 출력
y_pred

array([2, 1, 1, 1, 2, 1, 2, 0, 0, 0, 1, 0, 1, 2, 2, 1, 0, 0, 1, 1, 2, 1,
       1, 0, 0, 0, 1, 0, 2, 0])

In [50]:
y_true = np.argmax(y_data[rand_index],axis=1) # 실제 정답
y_true

array([2, 2, 1, 1, 2, 1, 2, 0, 0, 0, 1, 0, 1, 2, 2, 2, 0, 0, 1, 2, 2, 1,
       1, 0, 0, 0, 2, 0, 2, 0])

In [51]:
y_pred == y_true

array([ True, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True])

In [52]:
sum(y_pred == y_true) / len(rand_index) # True인 경우에만 sum

0.8666666666666667