In [41]:
import torch

x_train = torch.FloatTensor([ [1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5], [1, 7, 5, 5], [1, 2, 5, 6], [1, 6, 6, 6], [1, 7, 7, 7] ])
y_train = torch.FloatTensor([ [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0],[0, 1, 0], [1, 0, 0], [1, 0, 0] ])

#w,b 초기화 및 optimizer 생성
w = torch.randn(4,3, requires_grad=True)
b = torch.randn(1,3, requires_grad=True)
optim = torch.optim.Adam([w,b], lr=0.1)

for epoch in range(3001):

  h = torch.softmax(torch.mm(x_train, w) + b, dim=1) #x_train @ w +b, dim은 차원(0이면 행의 합, 1이면 열의 합)
  cost = -torch.mean(torch.sum(y_train * torch.log(h), dim=1)) #y_train을 곱하면 0인 값 다 없어짐

  optim.zero_grad() # 기울기 초기화
  cost.backward()
  optim.step()

  with torch.no_grad():
    if epoch % 100 == 0:
      print(f'epoch: {epoch}, cost: {cost.item()}')

epoch: 0, cost: 2.9247336387634277
epoch: 100, cost: 0.31312355399131775
epoch: 200, cost: 0.1825372874736786
epoch: 300, cost: 0.11977528780698776
epoch: 400, cost: 0.0846986398100853
epoch: 500, cost: 0.06306234747171402
epoch: 600, cost: 0.048775751143693924
epoch: 700, cost: 0.03885277733206749
epoch: 800, cost: 0.03168080747127533
epoch: 900, cost: 0.026326898485422134
epoch: 1000, cost: 0.02222164161503315
epoch: 1100, cost: 0.01900220848619938
epoch: 1200, cost: 0.016428738832473755
epoch: 1300, cost: 0.014337613247334957
epoch: 1400, cost: 0.012614177539944649
epoch: 1500, cost: 0.011176041327416897
epoch: 1600, cost: 0.009962919168174267
epoch: 1700, cost: 0.008929640986025333
epoch: 1800, cost: 0.008042030967772007
epoch: 1900, cost: 0.007273686118423939
epoch: 2000, cost: 0.006603957153856754
epoch: 2100, cost: 0.006016617175191641
epoch: 2200, cost: 0.005498621147125959
epoch: 2300, cost: 0.005039384588599205
epoch: 2400, cost: 0.004630394279956818
epoch: 2500, cost: 0.0042

In [43]:
#새로운 데이터(x)에 대한 결과값(y) 예측
x_test = torch.FloatTensor([ [1,11,10,9], [1, 3, 4, 3], [1, 1, 0, 1]])

h_test = torch.softmax(torch.mm(x_test, w) + b, dim=1)
print(h_test)
print(torch.argmax(h_test, dim=1)) # 텐서에서 가장 큰 값 리턴

tensor([[1.0000e+00, 2.0165e-19, 6.3598e-38],
        [1.1333e-04, 7.0520e-01, 2.9468e-01],
        [3.3437e-34, 1.5039e-11, 1.0000e+00]], grad_fn=<SoftmaxBackward0>)
tensor([0, 1, 2])


In [49]:
#조금 더 깔끔한 softmax

#마음에 안드는 부분 1) [1,0,0], [0,1,0], [0,0,1] 대신 0, 1, 2를 사용하면 안될까?
#마음에 안드는 부분 2) 이렇게 복잡한 함수를 항상 직접 구현해야하나? 어차피 softmax, cross entropy인데?
#해답) pytorch가 제공하는 cross_entropy 함수를 활용하면 해결!
import torch
import torch.nn.functional as F
import torch.nn as nn

x_train = torch.FloatTensor([ [1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5], [1, 7, 5, 5], [1, 2, 5, 6], [1, 6, 6, 6], [1, 7, 7, 7] ])
y_train = torch.LongTensor([2,2,2,1,1,1,0,0]) # y_train 수정

model = nn.Linear(4,3) # 입력 차원 -> 4, 출력 차원 -> 3, w와 b 초기화 해줌

optim = torch.optim.Adam(model.parameters(), lr=0.1) #parameters가 w, b 의미

for epoch in range(5001):
  h = model(x_train)
  cost = F.cross_entropy(h, y_train) # F.cross_entropy는 softmax와 cross entropy를 합친 것

  optim.zero_grad() # 기울기 초기화
  cost.backward()
  optim.step()

  with torch.no_grad():
    if epoch % 100 == 0:
      print(f'epoch: {epoch}, cost: {cost.item()}')



epoch: 0, cost: 3.1965904235839844
epoch: 100, cost: 0.34898409247398376
epoch: 200, cost: 0.22943076491355896
epoch: 300, cost: 0.15959779918193817
epoch: 400, cost: 0.11731728166341782
epoch: 500, cost: 0.08991436660289764
epoch: 600, cost: 0.07108975201845169
epoch: 700, cost: 0.05757715925574303
epoch: 800, cost: 0.04754377901554108
epoch: 900, cost: 0.03988942131400108
epoch: 1000, cost: 0.03391742333769798
epoch: 1100, cost: 0.02916867658495903
epoch: 1200, cost: 0.02533036842942238
epoch: 1300, cost: 0.022183500230312347
epoch: 1400, cost: 0.01957109570503235
epoch: 1500, cost: 0.01737830974161625
epoch: 1600, cost: 0.015519571490585804
epoch: 1700, cost: 0.01393011026084423
epoch: 1800, cost: 0.012560175731778145
epoch: 1900, cost: 0.011371037922799587
epoch: 2000, cost: 0.010332249104976654
epoch: 2100, cost: 0.009419398382306099
epoch: 2200, cost: 0.008612948469817638
epoch: 2300, cost: 0.007897132076323032
epoch: 2400, cost: 0.007258766330778599
epoch: 2500, cost: 0.00668721

In [50]:
#Sklearn에서 Softmax Regression
#sklearn에는 Logistic Regression에 Softmax Regression이 함께 구현됨
#y에 세 종류 이상의 값이 있을 경우 Softmax Regression 실행
import numpy as np
from sklearn.linear_model import LogisticRegression

x_train = np.array([ [1, 2, 1, 1], [2, 1, 3, 2], [3, 1, 3, 4], [4, 1, 5, 5], [1, 7, 5, 5], [1, 2, 5, 6], [1, 6, 6, 6], [1, 7, 7, 7] ])
y_train = np.array([2, 2, 2, 1, 1, 1, 0, 0]) #y에 0, 1, 2 등 셋 이상의 class가 존재 => Softmax Regression 실행

logistic = LogisticRegression(penalty = None) #모델 생성
logistic.fit(x_train, y_train) #학습

pred = logistic.predict([ [1,11,10,9], [1,3,4,3], [1,1,0,1] ]) #test case(값 예측)
print(pred)

[0 1 2]
