In [1]:
# %autosave 0
import warnings
warnings.filterwarnings(action='ignore')

import numpy as np
import pandas as pd

from tensorflow.keras.utils import to_categorical  # one-hot 엔코딩
from sklearn.preprocessing import LabelEncoder     # 명목형 척도 정수화 
from tensorflow.keras.models import load_model     # model 사용

In [2]:
model = load_model('./Iris.h5')

In [3]:
df = pd.read_csv('../../data/iris/iris_use.csv', header=[0])
print(df.head())

   sepal.length  sepal.width  petal.length  petal.width      species
0           5.0          3.5           1.3          0.3  Iris-setosa
1           4.5          2.3           1.3          0.3  Iris-setosa
2           4.4          3.2           1.3          0.2  Iris-setosa
3           5.0          3.5           1.6          0.6  Iris-setosa
4           5.1          3.8           1.9          0.4  Iris-setosa


In [4]:
data = df.values
print(type(data))
print(len(data))
X = data[:, 0:4].astype(float)  # 독립 변수, 정의역
Y_str = data[:, 4]              # 종속 변수, 치역 
print(Y_str)

<class 'numpy.ndarray'>
30
['Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica']


In [5]:
# 문자 레이블을 숫자화, 문자열을 알파벳순으로 그룹화한후 0부터 숫자로 변환
encoder = LabelEncoder()
Y = encoder.fit_transform(Y_str)
print(Y)

Y_encoded = to_categorical(Y) # one-hot-encoding, 해당 index만 1로 변경
print(Y_encoded[0])
print(Y_encoded[10])
print(Y_encoded[20])

# LabelEncoder() 엔코더 사용시 모든 그룹의 데이터를 통합하여 진행 권장
# CSV 훈련: A, B, C  CSV 테스트: A, B, D
#           0  1  2              0  1  ?  
# CSV A, B, C, D 모두 통합후 -> 엔코더 적용 -> 데이터 훈련, 테스트로 분할
#     1  2  3  4

[0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2]
[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]


In [6]:
print(encoder.inverse_transform([0, 1, 2])) # Label 확인

['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


In [7]:
# Iris-setosa: 0, Iris-versicolor: 1, Iris-virginica: 2
p = model.predict(X)  # 테스트 데이터 30건
print('p.shape:', p.shape)
print('데이터:', X[0]) # 첫번째 데이터행

p.shape: (30, 3)
데이터: [5.  3.5 1.3 0.3]


In [8]:
# 확률 0 ~ 1사이의 실수값
print('예측값:', p[0]) 
print('예측값: {0:.2f}% {1:.2f}% {2:.2f}%'.format((p[0][0]*100), (p[0][1]*100), (p[0][2]*100))) 
print('LabelEncoder: ', Y[0])
print('One-hot-encoding: ', Y_encoded[0])
print(np.argmax(p[0]))  # 가장 큰값의 index
print(Y_str[0])         # 첫번째 품종

예측값: [0.4704605  0.32327145 0.20626804]
예측값: 47.05% 32.33% 20.63%
LabelEncoder:  0
One-hot-encoding:  [1. 0. 0.]
0
Iris-setosa


In [9]:
for item in p: # 2차원 배열에서 1행씩 산출
    print(item)
    print('최대값:', np.max(item), '-> index:', np.argmax(item))
    # 최대값 -> 최대값이 있는 index

[0.4704605  0.32327145 0.20626804]
최대값: 0.4704605 -> index: 0
[0.43805328 0.34259284 0.21935396]
최대값: 0.43805328 -> index: 0
[0.4704605  0.32327145 0.20626804]
최대값: 0.4704605 -> index: 0
[0.4704605  0.32327145 0.20626804]
최대값: 0.4704605 -> index: 0
[0.4704605  0.32327145 0.20626804]
최대값: 0.4704605 -> index: 0
[0.4704605  0.32327145 0.20626804]
최대값: 0.4704605 -> index: 0
[0.4704605  0.32327145 0.20626804]
최대값: 0.4704605 -> index: 0
[0.4704605  0.32327145 0.20626804]
최대값: 0.4704605 -> index: 0
[0.4704605  0.32327145 0.20626804]
최대값: 0.4704605 -> index: 0
[0.4704605  0.32327145 0.20626804]
최대값: 0.4704605 -> index: 0
[0.02899355 0.571861   0.3991454 ]
최대값: 0.571861 -> index: 1
[0.03518805 0.56944776 0.39536414]
최대값: 0.56944776 -> index: 1
[0.02293045 0.5701026  0.406967  ]
최대값: 0.5701026 -> index: 1
[0.03738485 0.5177055  0.44490966]
최대값: 0.5177055 -> index: 1
[0.02399477 0.5362787  0.4397265 ]
최대값: 0.5362787 -> index: 1
[0.03837029 0.54091406 0.42071566]
최대값: 0.54091406 -> index: 1
[0.031

In [10]:
# softmax 함수의 값이 1이되는지 확인
# 컴퓨터는 내부적으로 정수 연산이라 미세한 오차 발생
for item in p:
    print(item, ' 합계: {0:0.0f}'.format(np.sum(item)))

[0.4704605  0.32327145 0.20626804]  합계: 1
[0.43805328 0.34259284 0.21935396]  합계: 1
[0.4704605  0.32327145 0.20626804]  합계: 1
[0.4704605  0.32327145 0.20626804]  합계: 1
[0.4704605  0.32327145 0.20626804]  합계: 1
[0.4704605  0.32327145 0.20626804]  합계: 1
[0.4704605  0.32327145 0.20626804]  합계: 1
[0.4704605  0.32327145 0.20626804]  합계: 1
[0.4704605  0.32327145 0.20626804]  합계: 1
[0.4704605  0.32327145 0.20626804]  합계: 1
[0.02899355 0.571861   0.3991454 ]  합계: 1
[0.03518805 0.56944776 0.39536414]  합계: 1
[0.02293045 0.5701026  0.406967  ]  합계: 1
[0.03738485 0.5177055  0.44490966]  합계: 1
[0.02399477 0.5362787  0.4397265 ]  합계: 1
[0.03837029 0.54091406 0.42071566]  합계: 1
[0.03163068 0.5642007  0.40416858]  합계: 1
[0.09309509 0.54125774 0.36564723]  합계: 1
[0.03083309 0.571171   0.39799586]  합계: 1
[0.05301544 0.54710674 0.39987788]  합계: 1
[0.00334995 0.19217865 0.80447143]  합계: 1
[0.00983193 0.30333653 0.6868315 ]  합계: 1
[0.00413861 0.276962   0.71889937]  합계: 1
[0.00837195 0.3195786  0.6720495 ]

In [11]:
# Iris-setosa: 0, Iris-versicolor: 1, Iris-virginica: 2
def decode(data):  # data: 1차원 배열, predict는 2차원 배열을 리턴함
    index = np.argmax(data) # 최대값의 index를 찾아줌
    if index == 0:
        label = 'Iris-setosa'
    elif index == 1:
        label = 'Iris-versicolor'
    elif index == 2:
        label = 'Iris-virginica'
        
    return label

In [12]:
print(len(p))
for i in range(len(p)):
    print('Target(실제값):', Y_str[i], ' Y hat(예측값):', decode(p[i]))

30
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y h

In [13]:
# input 함수의 사용
# 5.1,3.5,1.4,0.2
src = input('꽃받침 길이, 꽃받침 넓이, 꽃잎 길이, 꽃잎 넓이를 입력하세요.')
print(type(src))
print(src)

<class 'str'>



In [16]:
data = np.array(src.split(','), dtype=float) # 1차원 텐서
print(data.shape)
print(data)

(4,)
[5.1 3.5 1.4 0.2]


In [17]:
X = np.array([data])  # 2차원 텐서(배열)로 변환
print(X.shape)
print(X)
p = model.predict(X)  # 2차원 배열 대입
print('p:', p)

for i in range(len(p)):
    print('예측값:', decode(p[i])) # [[0.6227252  0.23900516 0.13826966]]

(1, 4)
[[5.1 3.5 1.4 0.2]]
p: [[0.95041513 0.00509432 0.04449051]]
예측값: Iris-setosa
