In [24]:
# Data 불러오기
import pandas as pd

csv = pd.read_csv("../Data/bmi.csv")
csv.head()

Unnamed: 0,height,weight,label
0,187,35,thin
1,144,68,fat
2,172,66,normal
3,156,54,normal
4,128,54,fat


In [25]:
# Data갯수
csv.count()

height    20000
weight    20000
label     20000
dtype: int64

In [3]:
# 기본 통계량 확인
csv.describe()

Unnamed: 0,height,weight
count,20000.0,20000.0
mean,159.62635,57.41915
std,23.292565,13.193164
min,120.0,35.0
25%,139.0,46.0
50%,160.0,57.0
75%,180.0,69.0
max,200.0,80.0


In [26]:
# label 종류
csv['label'].unique()

array(['thin', 'fat', 'normal'], dtype=object)

> label의 type이 object이므로 변환 작업이 필요하다.

In [27]:
# 결측치
csv.isnull().sum()

height    0
weight    0
label     0
dtype: int64

> 결측치 없음

#### Data 정규화 하기

In [29]:
# weight, height
csv['weight'] = csv['weight'] / csv['weight'].max()
csv['height'] = csv['height'] / csv['height'].max()
csv.head()

Unnamed: 0,height,weight,label
0,0.935,0.4375,thin
1,0.72,0.85,fat
2,0.86,0.825,normal
3,0.78,0.675,normal
4,0.64,0.675,fat


In [31]:
# label 
bclass = {
    'thin':[1,0,0],
    'normal':[0,1,0],
    'fat':[0,0,1]
}

y = []

for i, v in enumerate(csv['label']):
    y.append(bclass[v])

y

[[1, 0, 0],
 [0, 0, 1],
 [0, 1, 0],
 [0, 1, 0],
 [0, 0, 1],
 [1, 0, 0],
 [1, 0, 0],
 [1, 0, 0],
 [1, 0, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 0, 1],
 [0, 0, 1],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 1, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 0, 1],
 [1, 0, 0],
 [1, 0, 0],
 [0, 0, 1],
 [1, 0, 0],
 [0, 0, 1],
 [0, 1, 0],
 [0, 0, 1],
 [0, 1, 0],
 [0, 0, 1],
 [0, 1, 0],
 [0, 1, 0],
 [0, 1, 0],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [1, 0, 0],
 [0, 0, 1],
 [0, 0, 1],
 [0, 1, 0],
 [0, 0, 1],
 [0, 0, 1],
 [1, 0, 0],
 [0, 0, 1],
 [0, 0, 1],
 [1, 0, 0],
 [0, 0, 1],
 [1, 0, 0],
 [0, 0, 1],
 [0, 0, 1],
 [0, 1, 0],
 [0, 1, 0],
 [0, 1, 0],
 [0, 0, 1],
 [0, 1, 0],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 0, 1],
 [0, 1, 0],
 [1, 0, 0],
 [1, 0, 0],
 [1, 0, 0],
 [0, 1, 0],
 [1, 0, 0],
 [0, 1, 0],
 [0, 0, 1],
 [1, 0, 0],
 [0, 0, 1],
 [0, 0, 1],
 [1, 0, 0],
 [1, 0, 0],
 [1,

In [8]:
bclass['thin']

[1, 0, 0]

----
##### 훈련용과 테스트 데이터 나누기

In [32]:
# Train
X_train = csv[['height','weight']][:15000]
y_train = y[:15000]

In [33]:
X_train.tail()

Unnamed: 0,height,weight
14995,0.825,0.5
14996,0.79,0.8875
14997,0.7,0.7375
14998,0.975,0.7
14999,0.78,0.5


In [34]:
len(X_train)

15000

In [35]:
y_train[:5]

[[1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 0, 1]]

In [36]:
len(y_train)

15000

In [37]:
# Test
X_test = csv[['height','weight']][15000:]
y_test = y[15000:]

In [38]:
X_test.head()

Unnamed: 0,height,weight
15000,0.87,0.85
15001,0.875,0.775
15002,0.975,0.8375
15003,0.79,0.8875
15004,0.76,0.675


In [39]:
print(len(X_test))
print(len(y_test))

5000
5000


----
#### Deep Learning Model
- 입력층
- 은닉층 : 512개의 노드 , activation='relu
- 출력층 

##### 모델 만들기

In [40]:
from tensorflow import keras
from tensorflow.keras.layers import Input

In [41]:
model = keras.Sequential()
# 입력층
model.add(
    Input(shape=(2,))
)

# 은닉층
model.add(
    keras.layers.Dense(512, activation='relu')
)

# 출력층
model.add(
    keras.layers.Dense(3, activation='softmax')
)

#### 손실함수
- optimizer = 'rmsprop'

In [42]:
model.compile(
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        optimizer='rmsprop'
)

#### 데이터 훈련하기
- epochs = 50

In [43]:
import numpy as np

model.fit(
    np.array(X_train),
    np.array(y_train),
    epochs = 49
)

Epoch 1/49
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.6482 - loss: 0.8683
Epoch 2/49
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9113 - loss: 0.3600
Epoch 3/49
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9594 - loss: 0.2242
Epoch 4/49
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9636 - loss: 0.1705
Epoch 5/49
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9707 - loss: 0.1371
Epoch 6/49
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9736 - loss: 0.1196
Epoch 7/49
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9743 - loss: 0.1071
Epoch 8/49
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9736 - loss: 0.0998
Epoch 9/49
[1m469/469[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x21abcf3c8d0>

#### 테스트 데이터로 평가하기

In [21]:
score = model.evaluate(
            np.array(X_test),
            np.array(y_test)
)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9677 - loss: 0.0586


In [22]:
print('accuracy =', score[1])
print('loss=', score[0])

accuracy = 0.9706000089645386
loss= 0.05695221573114395


In [44]:
score

[0.05695221573114395, 0.9706000089645386]