In [1]:
#OS 단위에서의 작업 가능
import os

In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers, models, Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from tensorflow.keras.models import load_model

from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings("ignore")

In [14]:
data = pd.read_csv("titanic.csv")
data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], inplace=True)
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


In [15]:
data = data[['Sex', 'Age', 'Fare', 'Survived']]
data.head()

Unnamed: 0,Sex,Age,Fare,Survived
0,male,22.0,7.25,0
1,female,38.0,71.2833,1
2,female,26.0,7.925,1
3,female,35.0,53.1,1
4,male,35.0,8.05,0


In [16]:
data['Age'] = data['Age'].fillna(data.Age.mean())       #결측치 제거
data['Sex'] = data['Sex'].apply(lambda x : 0 if x=='female' else 1)     #라벨인코딩
data.isnull().sum()

Sex         0
Age         0
Fare        0
Survived    0
dtype: int64

In [17]:
data.describe()

Unnamed: 0,Sex,Age,Fare,Survived
count,891.0,891.0,891.0,891.0
mean,0.647587,29.699118,32.204208,0.383838
std,0.47799,13.002015,49.693429,0.486592
min,0.0,0.42,0.0,0.0
25%,0.0,22.0,7.9104,0.0
50%,1.0,29.699118,14.4542,0.0
75%,1.0,35.0,31.0,1.0
max,1.0,80.0,512.3292,1.0


In [19]:
#독립, 종속변수 분리, 타입변경
X = data.drop(columns=['Survived'])
y = data['Survived']
y = y.astype('int')
X = X.astype('int')

In [20]:
#훈련용, 테스트 분리
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [21]:
tf.keras.backend.clear_session()

model = Sequential() #모델 정의

#입력층
model.add(Dense(4, #뉴런의 개수
                input_dim=3, #입력 차원 수, 칼럼 수에 맞춤
                activation='relu' #활성화 함수
                ))
#히든 레이어
model.add(Dense(4, activation='relu'))

#최종 활성화 함수, 여기서 분류값이 결정됨
model.add(Dense(1, activation='sigmoid')) 
#sigmoid는 shape (,1)일 경우, 즉 0,1로만 종속변수가 구성될 때 사용
#softmax는 shape (,2개 이상)일 경우 사용

In [22]:
adam = tf.keras.optimizers.Adam(
         learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
         name='Adam')   

In [23]:
#모델 설정
#crossentropy 실제값과 예측값 사이의 분포 차이
model.compile(loss='binary_crossentropy', #이진 분류 loss 함수, softmax는 categorical_crossentropy
          optimizer=adam, #최적화 기법 설정
          metrics=['accuracy']) #epoch마다 어떤 지표를 보여주는 지 설정

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 4)                 16        
                                                                 
 dense_1 (Dense)             (None, 4)                 20        
                                                                 
 dense_2 (Dense)             (None, 1)                 5         
                                                                 
Total params: 41
Trainable params: 41
Non-trainable params: 0
_________________________________________________________________


In [25]:
checkpointer = ModelCheckpoint(filepath="model/first_titanic.h5", #코드 복붙 시 디렉토리 설정 바꿔야 함
                               monitor='val_loss', 
                               verbose=1,
                               save_best_only=True)

early_stopping_callback = EarlyStopping(monitor='val_loss',
                                        patience=200)

In [27]:
model.fit(x_train, y_train, 
          epochs=100, 
          batch_size=8,
          validation_split=1/8,
          callbacks=[early_stopping_callback,checkpointer])

Epoch 1/100
Epoch 00001: val_loss did not improve from 0.69157
Epoch 2/100
Epoch 00002: val_loss did not improve from 0.69157
Epoch 3/100
Epoch 00003: val_loss did not improve from 0.69157
Epoch 4/100
Epoch 00004: val_loss did not improve from 0.69157
Epoch 5/100
Epoch 00005: val_loss did not improve from 0.69157
Epoch 6/100
Epoch 00006: val_loss did not improve from 0.69157
Epoch 7/100
Epoch 00007: val_loss did not improve from 0.69157
Epoch 8/100
Epoch 00008: val_loss did not improve from 0.69157
Epoch 9/100
Epoch 00009: val_loss did not improve from 0.69157
Epoch 10/100
Epoch 00010: val_loss did not improve from 0.69157
Epoch 11/100
Epoch 00011: val_loss did not improve from 0.69157
Epoch 12/100
Epoch 00012: val_loss did not improve from 0.69157
Epoch 13/100
Epoch 00013: val_loss did not improve from 0.69157
Epoch 14/100
Epoch 00014: val_loss did not improve from 0.69157
Epoch 15/100
Epoch 00015: val_loss did not improve from 0.69157
Epoch 16/100
Epoch 00016: val_loss did not improv

<keras.callbacks.History at 0x2594eb693a0>

In [28]:
model = load_model("model/first_titanic.h5") #저장된 모델 로드

In [29]:
model.evaluate(x = x_test, y = y_test, batch_size=2) #간단하게 accuracy 측정



[0.681861937046051, 0.6145251393318176]

In [30]:
pred = model.predict(x_test) #X_test데이터로 예측해볼 수 있음
pd.DataFrame(classification_report(y_test, pred.round(), output_dict=True))

Unnamed: 0,0,1,accuracy,macro avg,weighted avg
precision,0.614525,0.0,0.614525,0.307263,0.377641
recall,1.0,0.0,0.614525,0.5,0.614525
f1-score,0.761246,0.0,0.614525,0.380623,0.467805
support,110.0,69.0,0.614525,179.0,179.0


In [32]:
x_test

array([[30, 48, 10],
       [ 1, 32, 70],
       [ 5, 20, 45],
       [20, 30,  5],
       [30, 51,  2],
       [89, 45, 18],
       [40, 55, 80],
       [56, 15, 65]])

In [38]:
int_features=[30, 48, 10]
final=np.array([int_features])

In [48]:
model.predict(final)

array([[0.9744547]], dtype=float32)

In [19]:
#tf.saved_model.save(model, "serving_model/1/")