In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [None]:
train_data = pd.read_csv("processed_train_data.csv")
test_data = pd.read_csv("processed_test_data.csv")

In [None]:
x_train = train_data.drop(columns=['Survived'])
y_train = train_data['Survived']
x_test = test_data.copy()

In [None]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)


In [None]:
model = Sequential()
model.add(Dense(64, input_dim=x_train_scaled.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(x_train_scaled, y_train, epochs=2400, batch_size=32, validation_split=0.2, verbose=2)

Epoch 1/2400
23/23 - 1s - loss: 0.3343 - accuracy: 0.8497 - val_loss: 0.5020 - val_accuracy: 0.8603 - 1s/epoch - 53ms/step
Epoch 2/2400
23/23 - 0s - loss: 0.3378 - accuracy: 0.8511 - val_loss: 0.5020 - val_accuracy: 0.8603 - 90ms/epoch - 4ms/step
Epoch 3/2400
23/23 - 0s - loss: 0.3173 - accuracy: 0.8624 - val_loss: 0.5023 - val_accuracy: 0.8603 - 93ms/epoch - 4ms/step
Epoch 4/2400
23/23 - 0s - loss: 0.3230 - accuracy: 0.8624 - val_loss: 0.5022 - val_accuracy: 0.8603 - 89ms/epoch - 4ms/step
Epoch 5/2400
23/23 - 0s - loss: 0.3409 - accuracy: 0.8441 - val_loss: 0.5015 - val_accuracy: 0.8603 - 102ms/epoch - 4ms/step
Epoch 6/2400
23/23 - 0s - loss: 0.3343 - accuracy: 0.8525 - val_loss: 0.5012 - val_accuracy: 0.8603 - 89ms/epoch - 4ms/step
Epoch 7/2400
23/23 - 0s - loss: 0.3526 - accuracy: 0.8455 - val_loss: 0.5010 - val_accuracy: 0.8603 - 91ms/epoch - 4ms/step
Epoch 8/2400
23/23 - 0s - loss: 0.3462 - accuracy: 0.8497 - val_loss: 0.5009 - val_accuracy: 0.8603 - 83ms/epoch - 4ms/step
Epoch 9/

In [None]:
# Evaluate the model on training data
train_predictions = (model.predict(x_train_scaled) > 0.5).astype(int).flatten()
train_accuracy = accuracy_score(y_train, train_predictions)
train_precision = precision_score(y_train, train_predictions)
train_recall = recall_score(y_train, train_predictions)
train_f1 = f1_score(y_train, train_predictions)
train_roc_auc = roc_auc_score(y_train, train_predictions)

print("Training Data Evaluation:")
print(f"Accuracy: {train_accuracy:.4f}")
print(f"Precision: {train_precision:.4f}")
print(f"Recall: {train_recall:.4f}")
print(f"F1 Score: {train_f1:.4f}")
print(f"ROC AUC Score: {train_roc_auc:.4f}")

Training Data Evaluation:
Accuracy: 0.8642
Precision: 0.9266
Recall: 0.7018
F1 Score: 0.7987
ROC AUC Score: 0.8336


In [None]:
predictions = (model.predict(x_test_scaled) > 0.5).astype(int).flatten()



In [None]:
pids = pd.read_csv("test.csv")['PassengerId']

In [None]:
submission = pd.DataFrame({
    'PassengerId': pids,
    'Survived': predictions
})

In [None]:
submission.to_csv('neural-network-submission-5.csv', index=False)

In [None]:
submission

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,0
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0
