In [65]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler

# Load CSV files
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# Encode categorical variable 'Sex'
train_data.replace({'male': 0, 'female': 1}, inplace=True)
test_data.replace({'male': 0, 'female': 1}, inplace=True)

# Fill missing values
train_data["Age"].fillna(train_data["Age"].mean(), inplace=True)
train_data["Fare"].fillna(train_data["Fare"].mean(), inplace=True)
train_data["Embarked"].fillna(train_data["Embarked"].mode()[0], inplace=True)

test_data["Age"].fillna(test_data["Age"].mean(), inplace=True)
test_data["Fare"].fillna(test_data["Fare"].mean(), inplace=True)
test_data["Embarked"].fillna(test_data["Embarked"].mode()[0], inplace=True)

# Drop PassengerId and Name (not useful for prediction)
train_data.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"], inplace=True)
test_data.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"], inplace=True)

# Convert categorical 'Embarked' to numerical
train_data = pd.get_dummies(train_data, columns=["Embarked"], drop_first=True)
test_data = pd.get_dummies(test_data, columns=["Embarked"], drop_first=True)

# Define features and target
X_train = train_data.drop(columns=["Survived"]).values
y_train = train_data["Survived"].values

X_test = test_data.values  # No Survived column in test data

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build a deeper model
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),  # Helps prevent overfitting
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)


  train_data.replace({'male': 0, 'female': 1}, inplace=True)
  test_data.replace({'male': 0, 'female': 1}, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_data["Age"].fillna(train_data["Age"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_data["Fare"].fillna(train_data["Fare"].m

Epoch 1/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 134ms/step - accuracy: 0.4539 - loss: 0.7198 - val_accuracy: 0.6369 - val_loss: 0.6668
Epoch 2/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6673 - loss: 0.6569 - val_accuracy: 0.7486 - val_loss: 0.6102
Epoch 3/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6973 - loss: 0.6285 - val_accuracy: 0.8101 - val_loss: 0.5602
Epoch 4/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7328 - loss: 0.5965 - val_accuracy: 0.8324 - val_loss: 0.5050
Epoch 5/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7666 - loss: 0.5550 - val_accuracy: 0.8436 - val_loss: 0.4562
Epoch 6/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7567 - loss: 0.5221 - val_accuracy: 0.8324 - val_loss: 0.4228
Epoch 7/100
[1m23/23[0m 

<keras.src.callbacks.history.History at 0x7fecc1497050>

In [66]:
# Load test data again to get PassengerId
test_data = pd.read_csv("test.csv")

# Make predictions
predictions = (model.predict(X_test) > 0.5).astype(int)

# Save predictions with PassengerId
output = pd.DataFrame({"PassengerId": test_data["PassengerId"], "Survived": predictions.flatten()})
output.to_csv("predictions.csv", index=False)

print("Predictions saved to predictions.csv")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
Predictions saved to predictions.csv
