In [1]:
# Imports

import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

In [2]:
# Load data

train = pd.read_csv('inputs/train.csv')
X_train = pd.read_csv('intermediary_outputs/X_train.csv')
X_test = pd.read_csv('intermediary_outputs/X_test.csv')

# Target

y = train['Survived']

In [3]:
# Neural network

mlp = MLPClassifier(random_state=188)

In [4]:
# Hyperparameter tuning

tuned_parameters = {
    'hidden_layer_sizes': [(16,), (32,), (64,), (128,)],
    'activation': ['relu', 'tanh'],
    'alpha': [0.001, 0.01, 0.1]
}

model = GridSearchCV(mlp, tuned_parameters, cv=5, verbose=1)
model.fit(X_train, y)

scores = model.cv_results_['mean_test_score']
scores_std = model.cv_results_['std_test_score']

print('Average scores:', scores.round(4))
print('Score standard deviations:', scores_std.round(3))
print('Best parameters:', model.best_params_)
print('Best score:', round(model.best_score_, 4))

Fitting 5 folds for each of 24 candidates, totalling 120 fits




Average scores: [0.8025 0.8216 0.826  0.8193 0.8081 0.8216 0.8294 0.826  0.8092 0.8171
 0.8249 0.826  0.835  0.8272 0.8249 0.8182 0.8305 0.8227 0.8305 0.8182
 0.8305 0.8272 0.826  0.8205]
Score standard deviations: [0.012 0.023 0.02  0.023 0.013 0.026 0.02  0.024 0.013 0.019 0.015 0.02
 0.02  0.015 0.023 0.026 0.02  0.019 0.021 0.024 0.019 0.015 0.02  0.024]
Best parameters: {'activation': 'tanh', 'alpha': 0.001, 'hidden_layer_sizes': (16,)}
Best score: 0.835




In [5]:
# Make predictions

probabilities = model.predict_proba(X_test)
predictions = model.predict(X_test)

submission = pd.DataFrame(columns=['PassengerId', 'Survived'])
submission['SurvivalProbability'] = probabilities[:, 1].round(2)
submission['Survived'] = predictions
submission['PassengerId'] = submission.index + 892

submission.head(10)

Unnamed: 0,PassengerId,Survived,SurvivalProbability
0,892,0,0.06
1,893,0,0.27
2,894,0,0.18
3,895,0,0.06
4,896,0,0.46
5,897,0,0.07
6,898,0,0.34
7,899,0,0.32
8,900,1,0.83
9,901,0,0.06


In [6]:
# Export to csv

submission[['PassengerId', 'Survived']].to_csv('outputs/submission_nn.csv', index=False)

KeyError: "['PassengerID'] not in index"