In [160]:
import torch
import torch.nn.functional as F

In [161]:
z = torch.FloatTensor([1, 2, 3])
y_hat = F.softmax(z, dim=0)

print(y_hat)

tensor([0.0900, 0.2447, 0.6652])


In [162]:
z = torch.rand(3, 5, requires_grad=True)
y_hat = F.softmax(z, dim=1)
print(z)
print(y_hat)
print(y_hat.sum(dim=1))

tensor([[0.7570, 0.2346, 0.6471, 0.3556, 0.4452],
        [0.0193, 0.2616, 0.7713, 0.3785, 0.9980],
        [0.9008, 0.4766, 0.1663, 0.8045, 0.6552]], requires_grad=True)
tensor([[0.2570, 0.1524, 0.2303, 0.1721, 0.1882],
        [0.1178, 0.1501, 0.2499, 0.1687, 0.3135],
        [0.2615, 0.1711, 0.1254, 0.2375, 0.2045]], grad_fn=<SoftmaxBackward0>)
tensor([1.0000, 1.0000, 1.0000], grad_fn=<SumBackward1>)


In [163]:
y = torch.randint(5, (3, )).long()

y_one_hot = torch.zeros_like(y_hat)
print(y_one_hot)
print(y.unsqueeze(1))

# one hot encoding
# torch에서 _가 붙은 메소드는 inplace의 의미를 가짐
y_one_hot.scatter_(1, y.unsqueeze(1), 1)    # .scatter_(dimension, index, value) <- dim의 경우 0부터 셈. 0이 1차원
print(y_one_hot)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])
tensor([[1],
        [4],
        [4]])
tensor([[0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.]])


In [164]:
import torch.nn as nn
import torch.optim as optim

In [165]:
torch.manual_seed(1)

x_data = [
    [1, 2, 1, 1],
    [2, 1, 3, 2],
    [3, 1, 3, 4],
    [4, 1, 5, 5],
    [1, 7, 5, 5],
    [1, 2, 5, 6],
    [1, 6, 6, 6],
    [1, 7, 7, 7]
    ]
y_data = [2, 2, 2, 1, 1, 1, 0, 0]

x_train = torch.FloatTensor(x_data)
y_train = torch.LongTensor(y_data)
# 데이터 갯수 8개, 데이터의 클래스가 3개라서 (8, 3)으로 인코딩 해야 함
y_one_hot = torch.zeros(8, 3)
y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
print(y_one_hot)

tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])


In [166]:
W = torch.zeros((4, 3), requires_grad=True) # 가중치의 갯수 3, 클래스의 갯수 3
b = torch.zeros((1, 3), requires_grad=True) # 편향은 1개로 충분함, 클래스의 갯수 3

In [167]:
optimizer = optim.SGD([W, b], lr=0.1)
epochs = 10000

In [168]:
for epoch in range(epochs + 1):
    y_hat = F.softmax(x_train.matmul(W)+b, dim=1)
    cost = (y_one_hot * -torch.log(y_hat)).sum(dim=1).mean()

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(
            f'Epoch: {epoch} | Cost: {cost.item()}'
        )

Epoch: 0 | Cost: 1.0986123085021973
Epoch: 100 | Cost: 0.704199492931366


Epoch: 200 | Cost: 0.6229994893074036
Epoch: 300 | Cost: 0.5657168030738831
Epoch: 400 | Cost: 0.5152913331985474
Epoch: 500 | Cost: 0.4676617383956909
Epoch: 600 | Cost: 0.4212779402732849
Epoch: 700 | Cost: 0.3754015862941742
Epoch: 800 | Cost: 0.3297657072544098
Epoch: 900 | Cost: 0.285072386264801
Epoch: 1000 | Cost: 0.2481546849012375
Epoch: 1100 | Cost: 0.23267605900764465
Epoch: 1200 | Cost: 0.22139869630336761
Epoch: 1300 | Cost: 0.21112912893295288
Epoch: 1400 | Cost: 0.2017364352941513
Epoch: 1500 | Cost: 0.19311325252056122
Epoch: 1600 | Cost: 0.185169517993927
Epoch: 1700 | Cost: 0.177829310297966
Epoch: 1800 | Cost: 0.17102716863155365
Epoch: 1900 | Cost: 0.16470730304718018
Epoch: 2000 | Cost: 0.15882112085819244
Epoch: 2100 | Cost: 0.15332652628421783
Epoch: 2200 | Cost: 0.14818620681762695
Epoch: 2300 | Cost: 0.1433679461479187
Epoch: 2400 | Cost: 0.13884282112121582
Epoch: 2500 | Cost: 0.13458557426929474
Epoch: 2600 | Cost: 0.13057352602481842
Epoch: 2700 | Cost: 0.12

In [169]:
y_hat = F.softmax(x_train.matmul(W)+b, dim=1)
print(y_hat)
print(y_one_hot)

tensor([[7.9508e-11, 6.5691e-06, 9.9999e-01],
        [1.7811e-04, 1.9474e-02, 9.8035e-01],
        [1.1671e-13, 4.1809e-02, 9.5819e-01],
        [6.1380e-10, 9.6248e-01, 3.7520e-02],
        [7.2622e-02, 9.2453e-01, 2.8516e-03],
        [3.8537e-02, 9.6146e-01, 7.0805e-08],
        [9.1230e-01, 8.7704e-02, 8.1008e-08],
        [9.9212e-01, 7.8797e-03, 5.7660e-11]], grad_fn=<SoftmaxBackward0>)
tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])


In [170]:
for y in y_hat:
    print(y.argmax().item())

2
2
2
1
1
1
0
0


In [171]:
# 모델로 소프트맥스 회귀 구현하기
model = nn.Linear(4, 3) # 입력값 4개, 출력값 3개
optimizer = optim.SGD(model.parameters(), lr=0.1)
epochs = 10000

for epoch in range(epochs + 1):
    y_hat = model(x_train)
    # 소프트맥스의 비용 함수는 크로스 엔트로피, 로지스틱의 비용 함수는 이진 크로스 엔트로피
    cost = F.cross_entropy(y_hat, y_train)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(
            f'Epoch: {epoch} | Cost: {cost.item()}'
        )

Epoch: 0 | Cost: 1.6167852878570557
Epoch: 100 | Cost: 0.6588908433914185
Epoch: 200 | Cost: 0.5734434723854065
Epoch: 300 | Cost: 0.5181514620780945
Epoch: 400 | Cost: 0.4732654392719269
Epoch: 500 | Cost: 0.4335159659385681
Epoch: 600 | Cost: 0.39656299352645874
Epoch: 700 | Cost: 0.36091411113739014
Epoch: 800 | Cost: 0.3253921866416931
Epoch: 900 | Cost: 0.2891784906387329
Epoch: 1000 | Cost: 0.25414782762527466
Epoch: 1100 | Cost: 0.23497343063354492
Epoch: 1200 | Cost: 0.22349268198013306
Epoch: 1300 | Cost: 0.2130531668663025
Epoch: 1400 | Cost: 0.20350950956344604
Epoch: 1500 | Cost: 0.1947513073682785
Epoch: 1600 | Cost: 0.1866869330406189
Epoch: 1700 | Cost: 0.17923785746097565
Epoch: 1800 | Cost: 0.17233800888061523
Epoch: 1900 | Cost: 0.16592969000339508
Epoch: 2000 | Cost: 0.15996336936950684
Epoch: 2100 | Cost: 0.15439589321613312
Epoch: 2200 | Cost: 0.14918936789035797
Epoch: 2300 | Cost: 0.1443105936050415
Epoch: 2400 | Cost: 0.13972996175289154
Epoch: 2500 | Cost: 0.13

In [172]:
preds = model(x_train)
print(preds)
for pred in preds:
    print(pred.argmax().item())

tensor([[-11.4190,  -0.1089,  11.8240],
        [ -4.3004,   0.4547,   4.3716],
        [-18.3870,   8.3334,  11.4634],
        [-12.6072,   8.7538,   5.5110],
        [  0.9640,   3.5059,  -2.2739],
        [  4.2154,   7.4301,  -9.0254],
        [  7.0721,   4.7319,  -9.1733],
        [ 10.5025,   5.6715, -13.0728]], grad_fn=<AddmmBackward0>)
2
2
2
1
1
1
0
0


In [None]:
class SoftmaxModel(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.linear = nn.Linear(in_features, out_features)
    
    def forward(self, x):
        return self.linear(x)

In [174]:
model = SoftmaxModel(4, 3)
optimizer = optim.SGD(model.parameters(), lr=0.001)
epochs = 50000

In [None]:
for epoch in range(epochs + 1):
    y_hat = model(x_train)
    # F.cross_entropy()를 사용하면 메소드 안에 소프트맥스 함수가 내장되어 있어서
    # model 안에서 nn.Softmax()를 생략하고 xx.Linear()만 불러와도 됨
    cost = F.cross_entropy(y_hat, y_train)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    
    if epoch % 100 == 0:
        print(
            f'Epoch: {epoch} | Cost: {cost.item()}'
        )

Epoch: 0 | Cost: 1.1581474542617798
Epoch: 100 | Cost: 1.1402904987335205


Epoch: 200 | Cost: 1.1276253461837769
Epoch: 300 | Cost: 1.1186543703079224
Epoch: 400 | Cost: 1.1113512516021729
Epoch: 500 | Cost: 1.1048188209533691
Epoch: 600 | Cost: 1.0987861156463623
Epoch: 700 | Cost: 1.0931718349456787
Epoch: 800 | Cost: 1.087937355041504
Epoch: 900 | Cost: 1.0830528736114502
Epoch: 1000 | Cost: 1.0784893035888672
Epoch: 1100 | Cost: 1.074218511581421
Epoch: 1200 | Cost: 1.0702136754989624
Epoch: 1300 | Cost: 1.0664502382278442
Epoch: 1400 | Cost: 1.0629057884216309
Epoch: 1500 | Cost: 1.0595600605010986
Epoch: 1600 | Cost: 1.0563946962356567
Epoch: 1700 | Cost: 1.0533937215805054
Epoch: 1800 | Cost: 1.0505425930023193
Epoch: 1900 | Cost: 1.0478289127349854
Epoch: 2000 | Cost: 1.045240879058838
Epoch: 2100 | Cost: 1.0427687168121338
Epoch: 2200 | Cost: 1.0404032468795776
Epoch: 2300 | Cost: 1.038136601448059
Epoch: 2400 | Cost: 1.0359615087509155
Epoch: 2500 | Cost: 1.0338714122772217
Epoch: 2600 | Cost: 1.0318608283996582
Epoch: 2700 | Cost: 1.029924392700195

In [176]:
preds = model(x_train)
for pred in preds:
    print(pred.argmax().item())

2
2
2
2
0
0
0
0


In [177]:
import sklearn.datasets as datasets
import numpy as np

iris = datasets.load_iris()
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [178]:
x_train = torch.FloatTensor(iris.data)
y_train = torch.LongTensor(iris.target)
print(x_train.size())
print(y_train.size())

torch.Size([150, 4])
torch.Size([150])


In [179]:
model = SoftmaxModel(iris.data.shape[1], len(np.unique(iris.target)))
optimizer = optim.SGD(model.parameters(), lr=0.001)
epochs = 50000

for epoch in range(epochs + 1):
    y_hat = model(x_train)
    cost = F.cross_entropy(y_hat, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    
    if epoch % 100 == 0:
        print(
            f'Epoch: {epoch} | Cost: {cost.item()}'
        )

Epoch: 0 | Cost: 1.0882495641708374
Epoch: 100 | Cost: 0.9941848516464233
Epoch: 200 | Cost: 0.9680538773536682
Epoch: 300 | Cost: 0.9605863690376282
Epoch: 400 | Cost: 0.9560307860374451
Epoch: 500 | Cost: 0.9521746635437012
Epoch: 600 | Cost: 0.9486566185951233
Epoch: 700 | Cost: 0.9453957080841064
Epoch: 800 | Cost: 0.9423595070838928
Epoch: 900 | Cost: 0.9395262598991394
Epoch: 1000 | Cost: 0.9368780255317688
Epoch: 1100 | Cost: 0.9343987107276917
Epoch: 1200 | Cost: 0.9320740699768066
Epoch: 1300 | Cost: 0.9298908710479736
Epoch: 1400 | Cost: 0.9278374314308167
Epoch: 1500 | Cost: 0.9259036183357239
Epoch: 1600 | Cost: 0.9240797162055969
Epoch: 1700 | Cost: 0.9223570823669434
Epoch: 1800 | Cost: 0.9207280278205872
Epoch: 1900 | Cost: 0.9191854000091553
Epoch: 2000 | Cost: 0.917722761631012
Epoch: 2100 | Cost: 0.9163345098495483
Epoch: 2200 | Cost: 0.9150152802467346
Epoch: 2300 | Cost: 0.9137599468231201
Epoch: 2400 | Cost: 0.9125642776489258
Epoch: 2500 | Cost: 0.9114241600036621

In [180]:
pred = model(x_train)
print(pred)

tensor([[9.6663e-01, 3.3331e-02, 3.7484e-05],
        [9.3108e-01, 6.8775e-02, 1.4584e-04],
        [9.5517e-01, 4.4747e-02, 8.4482e-05],
        [9.2923e-01, 7.0551e-02, 2.1823e-04],
        [9.7103e-01, 2.8941e-02, 3.3141e-05],
        [9.6677e-01, 3.3190e-02, 4.1946e-05],
        [9.5814e-01, 4.1755e-02, 1.0032e-04],
        [9.5464e-01, 4.5287e-02, 7.1650e-05],
        [9.1891e-01, 8.0780e-02, 3.1310e-04],
        [9.3403e-01, 6.5840e-02, 1.2773e-04],
        [9.7119e-01, 2.8786e-02, 2.3295e-05],
        [9.4649e-01, 5.3393e-02, 1.2102e-04],
        [9.3456e-01, 6.5311e-02, 1.3142e-04],
        [9.5798e-01, 4.1931e-02, 8.9643e-05],
        [9.8880e-01, 1.1202e-02, 2.9354e-06],
        [9.8863e-01, 1.1364e-02, 5.0633e-06],
        [9.8247e-01, 1.7515e-02, 1.2034e-05],
        [9.6463e-01, 3.5323e-02, 4.6428e-05],
        [9.6424e-01, 3.5725e-02, 3.0841e-05],
        [9.7324e-01, 2.6730e-02, 3.1666e-05],
        [9.3924e-01, 6.0675e-02, 8.6049e-05],
        [9.6716e-01, 3.2790e-02, 4

In [181]:
accuracies = []
for i in range(len(pred)):
    accuracies.append(y_train[i] == pred[i].argmax())

print(torch.FloatTensor(accuracies).sum() / len(torch.FloatTensor(accuracies)))

tensor(0.9733)
