### Multipleclass Classification Practice

In [2]:
# 필요한 라이브러리 호출
import torch
from torch.optim import Adam
from torch.nn import Linear, MSELoss, Sequential, Module, Softmax, CrossEntropyLoss
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

In [3]:
# iris 꽃 데이터 불러오기
iris = load_iris(as_frame=True)
df = iris['frame']
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [4]:
# description 보기
iris.keys()
print(iris['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

### sepal length: 5.1 / sepa width: 3.5 / petal length: 1.4, petal width: 0.2인 경우 종 예측하기

In [5]:
# feature와 label 가져오기
x_data = df.iloc[:, :4].values
y_data = df.iloc[:, -1:].values

In [6]:
# col별 가장 큰 데이터의 인덱스 추출
y_data = y_data.argmax(axis=1)
y_data

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [7]:
# x, y, w, b 준비
x = torch.FloatTensor(x_data)
y = torch.LongTensor(y_data)

w = torch.empty([4, 3])
b = torch.empty([3])

torch.nn.init.uniform_(w)
torch.nn.init.uniform_(b)

tensor([0.2328, 0.4369, 0.0933])

In [8]:
# cost function 정의
def cost():
    z = torch.matmul(x, w) + b
    cost_i = F.cross_entropy(z, y) # softmax 과정 포함
    c = torch.mean(cost_i)
    return c

In [9]:
# 학습
model = Sequential()
model.add_module('nn', Linear(4, 3))
model.add_module('softmax', Softmax(dim=1))
loss_fn = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.01)

for epoch in range(2000):
    optimizer.zero_grad()
    hx = model(x)
    cost = loss_fn(hx, y)
    cost.backward()
    optimizer.step()
    print(f'cost : {cost.item()}')

cost : 0.6925504803657532
cost : 0.6587494611740112
cost : 0.6327245235443115
cost : 0.613141655921936
cost : 0.5986031889915466
cost : 0.5878669619560242
cost : 0.5799314975738525
cost : 0.5740338563919067
cost : 0.5696132183074951
cost : 0.5662661790847778
cost : 0.5637027025222778
cost : 0.5617170929908752
cost : 0.5601603984832764
cost : 0.5589267611503601
cost : 0.5579380989074707
cost : 0.5571377277374268
cost : 0.5564827919006348
cost : 0.5559428334236145
cost : 0.555493175983429
cost : 0.5551158785820007
cost : 0.554796576499939
cost : 0.5545247793197632
cost : 0.5542919635772705
cost : 0.5540908575057983
cost : 0.5539165735244751
cost : 0.5537643432617188
cost : 0.553631067276001
cost : 0.5535134673118591
cost : 0.5534093379974365
cost : 0.5533168315887451
cost : 0.5532342791557312
cost : 0.5531600713729858
cost : 0.5530934929847717
cost : 0.5530332326889038
cost : 0.5529787540435791
cost : 0.5529292821884155
cost : 0.552884042263031
cost : 0.5528426170349121
cost : 0.55280458

In [10]:
# 학습된 w값 확인
model[0].weight

Parameter containing:
tensor([[ 0.7759,  0.6649,  0.4357, -0.1638],
        [-0.2529, -0.4178,  0.1224, -0.6258],
        [-0.6332, -0.7415, -0.7302, -0.3547]], requires_grad=True)

In [11]:
# 학습된 b값 확인
model[0].bias

Parameter containing:
tensor([ 0.3972, -0.4979, -0.9421], requires_grad=True)

In [13]:
# 예측하기
pred = model(torch.FloatTensor([[5.1, 3.5, 1.4, 0.2]]))
prob, idx = pred.max(dim=1)
result = idx.tolist()[0]

if result == 0:
    result = 'setosa'
elif result == 1:
    result = 'versicolor'
elif result == 2:
    result = 'virginica'

print(f'This flower is [{result}]')

This flower is [setosa]


---

### dataloader
- dataloader : batch 단위로 학습할 수 있도록 해줌
- batch : 한 번에 처리되는 데이터 샘플의 개수

In [19]:
# import DataLoader
from torch.utils.data import TensorDataset, DataLoader

In [None]:
# feature와 label을 TensorDataset으로 만들기
dataset = TensorDataset(x, y)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)
print(len(dataloader)) # 전체 데이터수 / 배치 수 (sampling)

10


In [28]:
# 학습
model = Sequential()
model.add_module('nn', Linear(4, 3))
model.add_module('softmax', Softmax(dim=1))
loss_fn = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.01)

for epoch in range(2000):
    print(f'epoch: {epoch}')
    for batch_x, batch_y in dataloader:
        optimizer.zero_grad()
        hx = model(x)
        cost = loss_fn(hx, y)
        cost.backward()
        optimizer.step()
        print(f'    cost : {cost.item():.4f}')

epoch: 0
    cost : 1.1731
    cost : 1.1200
    cost : 1.0617
    cost : 0.9994
    cost : 0.9349
    cost : 0.8711
    cost : 0.8111
    cost : 0.7575
    cost : 0.7121
    cost : 0.6753
epoch: 1
    cost : 0.6466
    cost : 0.6246
    cost : 0.6082
    cost : 0.5959
    cost : 0.5867
    cost : 0.5798
    cost : 0.5746
    cost : 0.5706
    cost : 0.5675
    cost : 0.5651
epoch: 2
    cost : 0.5632
    cost : 0.5616
    cost : 0.5604
    cost : 0.5594
    cost : 0.5586
    cost : 0.5579
    cost : 0.5573
    cost : 0.5568
    cost : 0.5564
    cost : 0.5560
epoch: 3
    cost : 0.5557
    cost : 0.5555
    cost : 0.5552
    cost : 0.5550
    cost : 0.5549
    cost : 0.5547
    cost : 0.5546
    cost : 0.5544
    cost : 0.5543
    cost : 0.5542
epoch: 4
    cost : 0.5542
    cost : 0.5541
    cost : 0.5540
    cost : 0.5539
    cost : 0.5539
    cost : 0.5538
    cost : 0.5538
    cost : 0.5537
    cost : 0.5537
    cost : 0.5536
epoch: 5
    cost : 0.5536
    cost : 0.5536
    cost :