In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


from tensorflow.keras.models import Sequential              
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD, Adam           # 최적화 모델
# from tensorflow.keras.callbacks import EarlyStopping        # 모델 사전 종료 라이브러리 (모델 분석 최적화를 위함)

In [5]:
df = pd.read_csv("./data/titanic.csv")

# delete useless_data column
df = df.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])

# x_data, y_data 분리
y_data = df['Survived'].astype(float)
x_data = df.drop(columns=['Survived'])

In [6]:
# x_data에 대한 결측치 처리 (평균 처리)
x_data['Age'] = x_data['Age'].fillna(np.mean(x_data['Age']))

# 이진 분류 데이터 0, 1로 casting (male: 0, female: 1)
x_data['Sex'] = (x_data['Sex'] == "female").astype(float)

# One-Hot-Encoding (column: Pclass, Embarked)
x_data = pd.get_dummies(x_data, columns=['Pclass', 'Embarked']).astype(float)

# 전처리 결과 확인 (column)
x_data

Unnamed: 0,Sex,Age,SibSp,Parch,Fare,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
0,0.0,34.50000,0.0,0.0,7.8292,0.0,0.0,1.0,0.0,1.0,0.0
1,1.0,47.00000,1.0,0.0,7.0000,0.0,0.0,1.0,0.0,0.0,1.0
2,0.0,62.00000,0.0,0.0,9.6875,0.0,1.0,0.0,0.0,1.0,0.0
3,0.0,27.00000,0.0,0.0,8.6625,0.0,0.0,1.0,0.0,0.0,1.0
4,1.0,22.00000,1.0,1.0,12.2875,0.0,0.0,1.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
413,0.0,30.27259,0.0,0.0,8.0500,0.0,0.0,1.0,0.0,0.0,1.0
414,1.0,39.00000,0.0,0.0,108.9000,1.0,0.0,0.0,1.0,0.0,0.0
415,0.0,38.50000,0.0,0.0,7.2500,0.0,0.0,1.0,0.0,0.0,1.0
416,0.0,30.27259,0.0,0.0,8.0500,0.0,0.0,1.0,0.0,0.0,1.0


In [7]:
# 데이터 정규화

# 정규화 함수 정의
scaler = MinMaxScaler(feature_range=(0, 1))

# x_data 정규화 (y_data는 0과 1이니까, 정규화 할 필요가 없지 않을까?)
x_data = scaler.fit_transform(x_data)

In [8]:
# 데이터 분할 (train, test)
x_train, x_test, y_train, y_test = train_test_split(
    x_data,
    y_data,
    test_size=0.2,
    random_state=42,
)

# train_data 데이터 분할 (train, validation)
x_train, x_val, y_train, y_val = train_test_split(
    x_train,
    y_train,
    test_size=0.125,
    random_state=42,
)

In [9]:
model = Sequential()

# 은닉층: Relu, 출력층: sigmoid
model.add(Dense(8, input_dim=11, activation='relu'))  # input 11
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# 최적화 모델 설정
optimizer = Adam()

model.summary()

model.compile(
    loss='mse',
    optimizer=optimizer,
    metrics=['accuracy']
)

history = model.fit(
    x_data,
    y_data,
    epochs=500,
    batch_size=8,
    validation_data=(x_val, y_val),
    verbose=1
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-04-19 18:39:05.166209: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/500
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6553 - loss: 0.2635 - val_accuracy: 0.6905 - val_loss: 0.2487
Epoch 2/500
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6459 - loss: 0.2477 - val_accuracy: 0.6905 - val_loss: 0.2416
Epoch 3/500
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6064 - loss: 0.2448 - val_accuracy: 0.6905 - val_loss: 0.2369
Epoch 4/500
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6270 - loss: 0.2409 - val_accuracy: 0.6905 - val_loss: 0.2326
Epoch 5/500
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6569 - loss: 0.2357 - val_accuracy: 0.6905 - val_loss: 0.2297
Epoch 6/500
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6426 - loss: 0.2355 - val_accuracy: 0.6905 - val_loss: 0.2272
Epoch 7/500
[1m53/53[0m [32m━━━

In [10]:
# model 평가
score = model.evaluate(x_test, y_test)
print(score)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5906 - loss: 0.2440 
[0.24272401630878448, 0.5952380895614624]
