In [1]:
# 1. fish 데이터 불러오기

import pandas as pd

fish = pd.read_csv('https://bit.ly/fish_csv_data')
fish.head()

Unnamed: 0,Species,Weight,Length,Diagonal,Height,Width
0,Bream,242.0,25.4,30.0,11.52,4.02
1,Bream,290.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,26.5,31.1,12.3778,4.6961
3,Bream,363.0,29.0,33.5,12.73,4.4555
4,Bream,430.0,29.0,34.0,12.444,5.134


In [2]:
# 2. target을 정수 인덱스로 변경
# ['Bream' 'Roach' 'Whitefish' 'Parkki' 'Perch' 'Pike' 'Smelt'] → [0~6]

import numpy as np
import pandas as pd

fish_target = fish['Species'].to_numpy()

classes = pd.unique(fish['Species'])
print(classes)

for i in range(len(fish_target)):
    for j in range(len(classes)):
        if fish_target[i] == classes[j]:
            fish_target[i] = j
            break

print(fish_target[:10])

['Bream' 'Roach' 'Whitefish' 'Parkki' 'Perch' 'Pike' 'Smelt']
[0 0 0 0 0 0 0 0 0 0]


In [4]:
#3. train/test 데이터 나누고, train/val(10%)로 나누기

from sklearn.model_selection import train_test_split

fish_input = fish[['Weight', 'Length', 'Diagonal', 'Height', 'Width']].to_numpy()

train_input, test_input, train_target, test_target = train_test_split(fish_input, fish_target, test_size=0.2, random_state=42)

train_input, val_input, train_target, val_target = train_test_split(train_input, train_target, test_size=0.1, random_state=42)

print(train_input.shape, test_input.shape, val_input.shape)

(114, 5) (32, 5) (13, 5)


In [5]:
# 4. StandardScaler로 표준점수 변환

from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
ss.fit(train_input)

train_scaled = ss.transform(train_input)
val_scaled = ss.transform(val_input)
test_scaled = ss.transform(test_input)

print(train_scaled.shape, val_scaled.shape, test_scaled.shape)

(114, 5) (32, 5) (13, 5)


In [8]:
# 5. Dense 생성

from tensorflow import keras

dense = keras.layers.Dense(7, activation='softmax', input_shape=(5,))

In [9]:
# 6. 신경망 모델 생성

model = keras.Sequential([dense])

In [10]:
# 7. train/val/test 타깃을 one-hot-coding으로 변경

oh_train_target = keras.utils.to_categorical(train_target)
oh_val_target = keras.utils.to_categorical(val_target)
oh_test_target = keras.utils.to_categorical(test_target)

print(oh_train_target[:5])
print(oh_val_target[:5])
print(oh_test_target[:5])

[[1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]
[[0. 1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]]
[[0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]]


In [11]:
# 8. 신경망 모델의 훈련방법 정의

model.compile(loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
# 9. 모델 훈련

# batch_size=1의 의미: 한 번의 가중치 업데이트마다 1개의 샘플만 사용한다는 뜻

model.fit(train_scaled, oh_train_target, batch_size=1, epochs=70)

Epoch 1/70
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6992 - loss: 0.8631
Epoch 2/70
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7800 - loss: 0.7787
Epoch 3/70
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6828 - loss: 0.9274
Epoch 4/70
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7481 - loss: 0.8461
Epoch 5/70
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8614 - loss: 0.6711
Epoch 6/70
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7586 - loss: 0.8590
Epoch 7/70
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7671 - loss: 0.8490
Epoch 8/70
[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7540 - loss: 0.7615
Epoch 9/70
[1m114/114[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7ae9305cb140>

In [15]:
# 10. 검증데이터와 테스트데이터의 점수 출력

model.evaluate(val_scaled, oh_val_target)
model.evaluate(test_scaled, oh_test_target)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 458ms/step - accuracy: 0.7692 - loss: 0.5737
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.8750 - loss: 0.7012


[0.7011867761611938, 0.875]

In [16]:
# 11. 테스트데이터 전체 분류결과와 타깃 출력

pred = model.predict(test_scaled[0:1])
pred = np.round(pred, decimals=5)
print(pred)
print(np.argmax(pred))
print(test_target[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[[0.0055  0.27776 0.00627 0.07789 0.50225 0.00358 0.12675]]
4
4
