### Split train and test set

In [1]:
# 필요한 라이브러리 호출
import torch
from torch.optim import Adam
from torch.nn import Linear, MSELoss, Sequential, Module, Softmax, CrossEntropyLoss
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split

In [2]:
# iris 데이터 불러오기
iris = load_iris(as_frame=True)
df = iris['frame']
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [8]:
# feature와 label 가져오기
x_data = df.iloc[:, :4].values
y_data = df.iloc[:, -1].values

In [9]:
# train 7 : test 3 비율로 나누기
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, stratify=y_data)

In [10]:
# 데이터 수 확인
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(105, 4) (45, 4) (105,) (45,)


In [11]:
# 학습용 x, y 데이터를 텐서로 생성
xt = torch.FloatTensor(x_train)
yt = torch.LongTensor(y_train)

In [12]:
# 학습
model = Sequential()
model.add_module('nn', Linear(4, 3))
model.add_module('softmax', Softmax(dim=1))
loss_fn = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.01)

for epoch in range(2000):
    optimizer.zero_grad()
    hx = model(xt)
    cost = loss_fn(hx, yt)
    cost.backward()
    optimizer.step()
    print(f'cost : {cost.item()}')

cost : 1.2014758586883545
cost : 1.1980900764465332
cost : 1.1941744089126587
cost : 1.1897304058074951
cost : 1.184822678565979
cost : 1.1795612573623657
cost : 1.1740609407424927
cost : 1.1682312488555908
cost : 1.1618624925613403
cost : 1.154862642288208
cost : 1.1472411155700684
cost : 1.1390560865402222
cost : 1.1303917169570923
cost : 1.121343970298767
cost : 1.1120049953460693
cost : 1.1024409532546997
cost : 1.0926679372787476
cost : 1.0826328992843628
cost : 1.0722131729125977
cost : 1.0612372159957886
cost : 1.0495338439941406
cost : 1.036982774734497
cost : 1.0235539674758911
cost : 1.0093488693237305
cost : 0.9946497678756714
cost : 0.9799934029579163
cost : 0.9662383794784546
cost : 0.9544985890388489
cost : 0.9457886219024658
cost : 0.9404091238975525
cost : 0.9375646710395813
cost : 0.9357851147651672
cost : 0.9337859153747559
cost : 0.9309006929397583
cost : 0.9270068407058716
cost : 0.9223073124885559
cost : 0.9171533584594727
cost : 0.9119309782981873
cost : 0.9069871

In [None]:
predt = model(xt)
prob, idx = predt.max(dim=1)
print(idx)

tensor([2, 0, 2, 0, 0, 0, 0, 1, 2, 1, 2, 2, 2, 0, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1,
        2, 2, 1, 2, 2, 0, 0, 0, 2, 0, 0, 2, 0, 2, 1, 0, 0, 2, 1, 1, 1, 0, 2, 2,
        0, 0, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 1, 1,
        1, 1, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 1, 2, 1, 1, 2, 2, 2, 2,
        1, 1, 1, 1, 1, 1, 2, 0, 2])


In [14]:
yt

tensor([2, 0, 2, 0, 0, 0, 0, 1, 2, 1, 2, 2, 2, 0, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1,
        2, 2, 1, 2, 2, 0, 0, 0, 2, 0, 0, 2, 0, 2, 1, 0, 0, 2, 1, 1, 1, 0, 2, 2,
        0, 0, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 1, 1,
        1, 1, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 1, 2, 1, 1, 2, 2, 2, 1,
        1, 1, 1, 1, 1, 1, 2, 0, 2])

In [None]:
# evaluation of train set
print(f'accuracy : {accuracy_score(yt.numpy(), idx.numpy()) * 100}%')

accuracy : 99.04761904761905%


In [None]:
# correlation matrix
df.corr()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
sepal length (cm),1.0,-0.11757,0.871754,0.817941,0.782561
sepal width (cm),-0.11757,1.0,-0.42844,-0.366126,-0.426658
petal length (cm),0.871754,-0.42844,1.0,0.962865,0.949035
petal width (cm),0.817941,-0.366126,0.962865,1.0,0.956547
target,0.782561,-0.426658,0.949035,0.956547,1.0


---

### Test set

In [24]:
# test용 x, y 데이터를 텐서로 생성
xtt = torch.FloatTensor(x_test)
ytt = torch.LongTensor(y_test)

In [25]:
predt = model(xtt)
prob, idx = predt.max(dim=1)
print(idx)

tensor([1, 0, 2, 2, 0, 2, 2, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 2, 2, 0, 0, 0,
        1, 1, 2, 2, 0, 2, 1, 1, 0, 1, 2, 1, 1, 1, 2, 2, 0, 1, 1, 0, 2])


In [26]:
# evaluation of test set
print(f'accuracy : {accuracy_score(ytt.numpy(), idx.numpy()) * 100}%')

accuracy : 95.55555555555556%
