In [16]:
# 1️⃣ 데이터 분리 (X, y)
# 2️⃣ 결측치 최소 처리 (drop or fillna)
# 3️⃣ 인코딩 (문자열 → 숫자)
# 4️⃣ 기본 모델 학습 (Logistic Regression or Neural Net)
# 5️⃣ 성능 확인 (train/test split)

import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers

In [17]:
pd.set_option('display.max_columns', None)  # 전체 컬럼 출력
pd.set_option('display.max_rows', None) 
keras.utils.set_random_seed(42)  

In [18]:
train = pd.read_csv('train.csv')

In [19]:
X = train.drop(columns=['Survived','Name','Cabin','Ticket'], axis=1)
y = train['Survived']

X_train,X_val,y_train,y_val = train_test_split(X,y,test_size=0.2,random_state=42)

In [20]:
X_train = X_train.fillna(0)
X_val = X_val.fillna(0)
X_train = pd.get_dummies(X_train, columns=['Sex','Embarked'])
X_val = pd.get_dummies(X_val, columns=['Sex','Embarked'])

X_train, X_val = X_train.align(X_val, join='left', axis=1, fill_value=0)

In [21]:
print(type(X_train))
print(type(y_train))
print(X_train.shape)
print(y_train.shape)


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
(712, 12)
(712,)


In [22]:
model_batch32 = keras.Sequential([
    keras.Input(shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu', name='layer1'),
    layers.Dense(32, activation='relu', name='layer2'),
    layers.Dense(1, activation='sigmoid', name='layer3')
])
model_batch32.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_batch32.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val))


Epoch 1/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5112 - loss: 5.2016 - val_accuracy: 0.5698 - val_loss: 1.8293
Epoch 2/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5253 - loss: 1.1312 - val_accuracy: 0.6257 - val_loss: 0.9504
Epoch 3/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5744 - loss: 0.7943 - val_accuracy: 0.6536 - val_loss: 0.6068
Epoch 4/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6250 - loss: 0.6590 - val_accuracy: 0.6927 - val_loss: 0.5668
Epoch 5/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6685 - loss: 0.6197 - val_accuracy: 0.7318 - val_loss: 0.5347
Epoch 6/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7037 - loss: 0.5967 - val_accuracy: 0.7821 - val_loss: 0.5300
Epoch 7/20
[1m23/23[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x3162a3ed0>

In [23]:
val_loss32, val_acc32 = model_batch32.evaluate(X_val, y_val)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7486 - loss: 0.4747 


In [24]:
model_batch64 = keras.Sequential([
    keras.Input(shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu', name='layer1'),
    layers.Dense(32, activation='relu', name='layer2'),
    layers.Dense(1, activation='sigmoid', name='layer3')
])
model_batch64.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_batch64.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_val, y_val))


Epoch 1/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4087 - loss: 23.9791 - val_accuracy: 0.5866 - val_loss: 5.8990
Epoch 2/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5730 - loss: 7.3428 - val_accuracy: 0.5587 - val_loss: 3.5120
Epoch 3/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4719 - loss: 2.5797 - val_accuracy: 0.5084 - val_loss: 2.2724
Epoch 4/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4902 - loss: 1.9094 - val_accuracy: 0.4916 - val_loss: 1.3318
Epoch 5/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4874 - loss: 1.2082 - val_accuracy: 0.5251 - val_loss: 1.0422
Epoch 6/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5112 - loss: 0.9202 - val_accuracy: 0.4581 - val_loss: 0.8352
Epoch 7/20
[1m12/12[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x316546950>

In [25]:
val_loss64, val_acc64 = model_batch64.evaluate(X_val, y_val)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7207 - loss: 0.5225 


In [26]:
model_batch128 = keras.Sequential([
    keras.Input(shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu', name='layer1'),
    layers.Dense(32, activation='relu', name='layer2'),
    layers.Dense(1, activation='sigmoid', name='layer3')
])
model_batch128.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_batch128.fit(X_train, y_train, epochs=20, batch_size=128, validation_data=(X_val, y_val))


Epoch 1/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4340 - loss: 14.3519 - val_accuracy: 0.5866 - val_loss: 4.7047
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6236 - loss: 6.6216 - val_accuracy: 0.5922 - val_loss: 5.6238
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6011 - loss: 3.2636 - val_accuracy: 0.4358 - val_loss: 3.1730
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5042 - loss: 2.3994 - val_accuracy: 0.6145 - val_loss: 1.9171
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6503 - loss: 2.0769 - val_accuracy: 0.6760 - val_loss: 0.7348
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5815 - loss: 1.2151 - val_accuracy: 0.6760 - val_loss: 0.8727
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x316558350>

In [27]:
val_loss128, val_acc128 = model_batch128.evaluate(X_val, y_val)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7039 - loss: 0.5549 


In [28]:
print({
    "bs32":  {"val_acc": float(val_acc32),  "val_loss": float(val_loss32)},
    "bs64":  {"val_acc": float(val_acc64),  "val_loss": float(val_loss64)},
    "bs128": {"val_acc": float(val_acc128), "val_loss": float(val_loss128)},
})

{'bs32': {'val_acc': 0.748603343963623, 'val_loss': 0.4746743440628052}, 'bs64': {'val_acc': 0.7206704020500183, 'val_loss': 0.5225072503089905}, 'bs128': {'val_acc': 0.7039105892181396, 'val_loss': 0.5548739433288574}}


In [29]:
test = pd.read_csv('test.csv')

In [30]:
test = test.fillna(0)
test = pd.get_dummies(test)
X_train, test = X_train.align(test, join='left', axis=1, fill_value=0)

In [31]:
pred = model_batch64.predict(test)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


In [32]:
submission = pd.DataFrame({
    'PassengerId': pd.read_csv('test.csv')['PassengerId'],
    'Survived': (pred > 0.5).astype(int).flatten()
})

In [33]:
submission.to_csv('submission/submission_batch_evaluation_64.csv', index=False)