In [1]:
# At the top of your notebook, run:
!python -m pip install --upgrade pip

# Then install TensorFlow:
!python -m pip install tensorflow



In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

import tensorflow as tf
from tensorflow.keras import Input, Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# ---------------------------
# Step 1: Load the data
# ---------------------------
train = pd.read_csv('train.csv')
test  = pd.read_csv('test.csv')

In [3]:
# ---------------------------
# Step 2: Feature engineering
# ---------------------------
def extract_deck(cabin):
    if pd.isnull(cabin):
        return "Missing"
    return cabin.split('/')[0]

for df in (train, test):
    # 2a) Deck from Cabin
    df['Deck'] = df['Cabin'].apply(extract_deck)

    # 2b) CryoSleep & VIP → pandas nullable Boolean → fillna → int(0/1)
    for col in ['CryoSleep', 'VIP']:
        df[col] = (
            df[col]
              .replace({'True': True, 'False': False})
              .astype('boolean')    # pandas’ nullable Boolean
              .fillna(False)        # fills <NA> with False
              .astype(int)          # True→1, False→0
        )

In [4]:
# ---------------------------
# Step 3: Define X, y
# ---------------------------
# Target: Transported → 1 if True, 0 if False
y = train['Transported'].map({True:1, False:0})

# Drop cols we won’t feed into the model
drop_cols = ['PassengerId','Name','Cabin','Transported']
X = train.drop(drop_cols, axis=1)
X_test = test.drop(['PassengerId','Name','Cabin'], axis=1)

In [5]:
# ---------------------------
# Step 4: Preprocessing pipeline
# ---------------------------
numeric_features = ['Age','RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']
numeric_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler',   StandardScaler())
])

categorical_features = ['HomePlanet','Destination','Deck']
categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='Missing')),
    ('onehot',  OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', numeric_pipeline,     numeric_features),
    ('cat', categorical_pipeline, categorical_features)
])

# Fit the preprocessor on X, transform X and X_test
X_proc = preprocessor.fit_transform(X)
X_test_proc = preprocessor.transform(X_test)

In [6]:
# ---------------------------
# Step 5: Train/validation split
# ---------------------------
X_train, X_val, y_train, y_val = train_test_split(
    X_proc, y, test_size=0.2, random_state=42, stratify=y
)

In [7]:
# ---------------------------
# Step 6: Build the neural network
# ---------------------------
input_dim = X_train.shape[1]

model = Sequential([
    # 1) Explicit Input layer
    Input(shape=(input_dim,)),        

    # 2) Hidden layers
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),

    # 3) Output layer
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Early stopping on validation loss
es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [8]:
# ---------------------------
# Step 7: Train
# ---------------------------
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[es],
    verbose=2
)

Epoch 1/100
218/218 - 1s - 7ms/step - accuracy: 0.6898 - loss: 0.5722 - val_accuracy: 0.7780 - val_loss: 0.4566
Epoch 2/100
218/218 - 0s - 2ms/step - accuracy: 0.7660 - loss: 0.4768 - val_accuracy: 0.7849 - val_loss: 0.4401
Epoch 3/100
218/218 - 0s - 2ms/step - accuracy: 0.7748 - loss: 0.4617 - val_accuracy: 0.7964 - val_loss: 0.4276
Epoch 4/100
218/218 - 0s - 2ms/step - accuracy: 0.7785 - loss: 0.4526 - val_accuracy: 0.7918 - val_loss: 0.4236
Epoch 5/100
218/218 - 0s - 2ms/step - accuracy: 0.7813 - loss: 0.4475 - val_accuracy: 0.8010 - val_loss: 0.4198
Epoch 6/100
218/218 - 0s - 1ms/step - accuracy: 0.7852 - loss: 0.4408 - val_accuracy: 0.8016 - val_loss: 0.4154
Epoch 7/100
218/218 - 0s - 1ms/step - accuracy: 0.7900 - loss: 0.4367 - val_accuracy: 0.7982 - val_loss: 0.4149
Epoch 8/100
218/218 - 0s - 1ms/step - accuracy: 0.7866 - loss: 0.4305 - val_accuracy: 0.7987 - val_loss: 0.4132
Epoch 9/100
218/218 - 0s - 1ms/step - accuracy: 0.7869 - loss: 0.4278 - val_accuracy: 0.7987 - val_loss:

In [9]:
# ---------------------------
# Step 8: Predict & submit
# ---------------------------
probs = model.predict(X_test_proc).ravel()
preds = (probs >= 0.5).astype(bool)

submission = pd.DataFrame({
    'PassengerId': test['PassengerId'],
    'Transported': preds
})
submission.to_csv('submission_neural_network.csv', index=False)
print("Saved submission_neural_network.csv")

[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 729us/step
Saved submission_neural_network.csv
