# Multinomial Classification

### Imports

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as func
import torch.optim as opt

from google.colab import drive
drive.mount('/content/drive')

# Seed 고정
torch.manual_seed(1)

Mounted at /content/drive


<torch._C.Generator at 0x7f0f304ed790>

### 1.Data loading

|$x_1$|$x_2$|$x_3$|$x_4$|$y$|
|:---:|:---:|:---:|:---:|:---:|
|1|2|1|1|2|
|2|1|3|2|2|
|3|1|3|4|2|
|4|1|5|5|1|
|1|7|5|5|1|
|1|2|5|6|1|
|1|6|6|6|0|
|1|7|7|7|0|



In [None]:
# Data 수동으로 입력하기
x_train = torch.FloatTensor([[1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5],
               [1, 7, 5, 5], [1, 2, 5, 6], [1, 6, 6, 6], [1, 7, 7, 7]])
y_train = torch.LongTensor([2, 2, 2, 1, 1, 1, 0, 0])

print(x_train)
print(x_train.shape)
print(y_train)
print(y_train.shape)

# Model 초기화 (입력 dim, 출력 dim)
model = nn.Linear(4, 3)

tensor([[1., 2., 1., 1.],
        [2., 1., 3., 2.],
        [3., 1., 3., 4.],
        [4., 1., 5., 5.],
        [1., 7., 5., 5.],
        [1., 2., 5., 6.],
        [1., 6., 6., 6.],
        [1., 7., 7., 7.]])
torch.Size([8, 4])
tensor([2, 2, 2, 1, 1, 1, 0, 0])
torch.Size([8])


### 2. Hypothesis, Cost and Optimization

- 다음 수식이 맞다고 가정한다

$$ y_{hypo} = H(x) = Softmax(Lienar(x)) $$

$$ Linear(x) = W ⋅ x + b $$

$$ Softmax(X) = 
\begin{bmatrix} 
y_1 & ⃛ & y_C \\ 
\end{bmatrix}
\quad
,
y_i = \frac{e^{x_i}}{∑\limits_{j=1}^{C}e^{x_j}}$$

- Cost function으로 Cross Entropy를 이용한다

$$ -∑\limits_{i=1}^{C}y_{train,i}log(y_{hypo,i}) $$

- Pytorch의 cross entropy loss function에는 softmax가 이미 포함되어 있음에 주의!

  + 그렇기 때문에, model을 그냥 linear model을 사용한다!
  +$ y_{train} $ 입력 형태는 class의 index이다 (onehot encoding을 해줄 필요 없다)

In [None]:
# Optimizer 설정 (learning rate = 1로 설정)
optimizer = opt.SGD(model.parameters(), lr=0.1)

# 반복
for epoch in range(1000):

  # Cost 계산 / mse_loss(가정에의한값, 참값)
  y_hypo = model(x_train)
  cost = func.cross_entropy(y_hypo, y_train)

  # cost를 이용해 model update
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  # 10번 마다 중간결과 출력
  if epoch % 100 == 99:
    correct_prediction = y_hypo.argmax(dim=1) == y_train
    accuracy = correct_prediction.sum().item() / len(correct_prediction)
    print('Epoch {:4d}/{} Cost: {:.6f} Accuracy {:2.2f}%'.format(
            epoch, 1000, cost.item(), accuracy * 100,
    ))

Epoch   99/1000 Cost: 0.714870 Accuracy 75.00%
Epoch  199/1000 Cost: 0.633744 Accuracy 62.50%
Epoch  299/1000 Cost: 0.576285 Accuracy 62.50%
Epoch  399/1000 Cost: 0.525853 Accuracy 87.50%
Epoch  499/1000 Cost: 0.478306 Accuracy 87.50%
Epoch  599/1000 Cost: 0.432054 Accuracy 87.50%
Epoch  699/1000 Cost: 0.386326 Accuracy 87.50%
Epoch  799/1000 Cost: 0.340779 Accuracy 87.50%
Epoch  899/1000 Cost: 0.295738 Accuracy 87.50%
Epoch  999/1000 Cost: 0.255138 Accuracy 100.00%


### 3. Assignment

- Data 파일을 이용한 Multinomial Classification
  + data_multinomial_classification.csv을 이용해 학습한다
    * 16개의 input, 1개의 output

$$
\begin{bmatrix}
x_1 & ⃛ & x_{16}, y \\
\end{bmatrix}
$$

- Accuracy 95% 이상을 달성한다

In [4]:
#dataset 불러오기 (구글 드라이브 사용)
dataset = np.loadtxt(
    '/content/drive/MyDrive/Colab Notebooks/data_multinomial_classification.csv', 
    delimiter=',', 
    dtype=np.float32)

# Dataset의 순서를 random으로 섞기
np.random.shuffle(dataset)

In [6]:
# torch tensor로 변환
x_train = torch.FloatTensor(dataset[:,:-1])
y_train = torch.LongTensor(dataset[:,-1])
# longtensor -> crossentropy에서 float 안받음

In [7]:
# Model 초기화 (입력 dim, 출력 dim)
model = nn.Linear(16, 7)

# Optimizer 설정 (learning rate = 1로 설정)
optimizer = opt.SGD(model.parameters(), lr=0.1)

# 반복
for epoch in range(1000):

  # Cost 계산 / mse_loss(가정에의한값, 참값)
  y_hypo = model(x_train)
  cost = func.cross_entropy(y_hypo, y_train)

  # cost를 이용해 model update
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  # 10번 마다 중간결과 출력
  if epoch % 100 == 99:
    correct_prediction = y_hypo.argmax(dim=1) == y_train
    accuracy = correct_prediction.sum().item() / len(correct_prediction)
    print('Epoch {:4d}/{} Cost: {:.6f} Accuracy {:2.2f}%'.format(
            epoch, 1000, cost.item(), accuracy * 100,
    ))

Epoch   99/1000 Cost: 0.469938 Accuracy 89.11%
Epoch  199/1000 Cost: 0.317336 Accuracy 93.07%
Epoch  299/1000 Cost: 0.245835 Accuracy 95.05%
Epoch  399/1000 Cost: 0.202133 Accuracy 96.04%
Epoch  499/1000 Cost: 0.172118 Accuracy 97.03%
Epoch  599/1000 Cost: 0.150037 Accuracy 100.00%
Epoch  699/1000 Cost: 0.133041 Accuracy 100.00%
Epoch  799/1000 Cost: 0.119530 Accuracy 100.00%
Epoch  899/1000 Cost: 0.108523 Accuracy 100.00%
Epoch  999/1000 Cost: 0.099384 Accuracy 100.00%
