In [577]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam


In [578]:
data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [579]:
data.drop(['PassengerId','Name','Ticket','Cabin'],axis=1,inplace=True)
test_data.drop(['PassengerId','Name','Ticket','Cabin'],axis=1,inplace=True)


In [580]:
splitter = StratifiedShuffleSplit(n_splits=1,test_size=0.2)

In [581]:
for train_index, test_index in splitter.split(data,data['Survived']):
    train_data = data.iloc[train_index]
    validation_data = data.iloc[test_index]

In [582]:
X_train = train_data.drop(['Survived'],axis=1)
y_train = train_data['Survived']

X_validation = validation_data.drop(['Survived'],axis=1)
y_validation = validation_data['Survived']


In [583]:
numerical_features = X_train.select_dtypes(include=['int64','float64']).columns.tolist()
categorical_features = X_train.select_dtypes(include=['category','bool','object']).columns.tolist()

In [584]:
numerical_pipeline = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='median')),
        ('scaler',StandardScaler())
    ]
)

categorical_pipeline = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='most_frequent')),
        ('onehot',OneHotEncoder(handle_unknown='ignore'))
    ]
)

In [585]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num',numerical_pipeline,numerical_features),
        ('cat',categorical_pipeline,categorical_features)
    ]
)

In [586]:
X_train = preprocessor.fit_transform(X_train)
X_validation = preprocessor.transform(X_validation)
X_test = preprocessor.transform(test_data)

In [587]:
tf.random.set_seed(42)

In [588]:
model = Sequential()

In [589]:
model.add(Dense(units=256,activation='relu',input_shape=(X_train.shape[1],)))
model.add(Dense(units=128,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=64,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=32,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=16,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=1,activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [590]:
model.compile(optimizer=Adam(),loss='binary_crossentropy',metrics=['accuracy'])

In [591]:
model.summary()

In [592]:
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_validation,y_validation),
    verbose=2
)

Epoch 1/30
23/23 - 1s - 31ms/step - accuracy: 0.6208 - loss: 0.6555 - val_accuracy: 0.7207 - val_loss: 0.5908
Epoch 2/30
23/23 - 0s - 2ms/step - accuracy: 0.7205 - loss: 0.5616 - val_accuracy: 0.7765 - val_loss: 0.5219
Epoch 3/30
23/23 - 0s - 2ms/step - accuracy: 0.7767 - loss: 0.5085 - val_accuracy: 0.7709 - val_loss: 0.5048
Epoch 4/30
23/23 - 0s - 2ms/step - accuracy: 0.8076 - loss: 0.4822 - val_accuracy: 0.7654 - val_loss: 0.5080
Epoch 5/30
23/23 - 0s - 2ms/step - accuracy: 0.8146 - loss: 0.4636 - val_accuracy: 0.7654 - val_loss: 0.4768
Epoch 6/30
23/23 - 0s - 2ms/step - accuracy: 0.8287 - loss: 0.4288 - val_accuracy: 0.7877 - val_loss: 0.4867
Epoch 7/30
23/23 - 0s - 5ms/step - accuracy: 0.8258 - loss: 0.4540 - val_accuracy: 0.7989 - val_loss: 0.4587
Epoch 8/30
23/23 - 0s - 2ms/step - accuracy: 0.8371 - loss: 0.4242 - val_accuracy: 0.7989 - val_loss: 0.4679
Epoch 9/30
23/23 - 0s - 2ms/step - accuracy: 0.8385 - loss: 0.4314 - val_accuracy: 0.7933 - val_loss: 0.4599
Epoch 10/30
23/23 

In [593]:
val_loss, val_accuracy = model.evaluate(X_validation, y_validation)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 865us/step - accuracy: 0.7875 - loss: 0.4657


In [594]:
y_pred = model.predict(X_test)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [595]:
y_pred = (y_pred > 0.5).astype(int)

In [596]:
result = pd.DataFrame(
    {
        "PassengerId":list(range(892,892+len(y_pred))),
        "Survived" : y_pred.flatten()
    }
    )
result.to_csv("submission.csv",index=False)   