<a href="https://colab.research.google.com/github/Jiablero/notebooks/blob/master/titatic_kaggle_contest_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd

import numpy as np

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

from tensorflow import keras

In [0]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [0]:
def prepare_titanic_data(data):
  X = data.drop(['PassengerId', 'Name', 'Fare', 'Ticket', 'Embarked', 'Cabin'], axis = 1)
  if 'Survived' in data.columns : 
    X = X.drop('Survived', axis = 1) # в тестовых данных нет Survived
  X = X.fillna({'Age': X.Age.median()})
  X = pd.get_dummies(X).drop('Sex_male', axis=1)
  return(X)

In [0]:
X, test_X = train_test_split(train, test_size = 0.1)

In [0]:
y = X.Survived
X = prepare_titanic_data(X)
test_y = test_X.Survived
test_X = prepare_titanic_data(test_X)

In [64]:
model = keras.Sequential(
    [
        keras.layers.Dense(
            256, activation="relu", input_shape=(X.shape[-1],)
        ),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 256)               1536      
_________________________________________________________________
dense_9 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 256)               65792     
_________________________________________________________________
dropout_5 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 257       
Total params: 133,377
Trainable params: 133,377
Non-trainable params: 0
________________________________________________

In [0]:
metrics = [
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TrueNegatives(name="tn"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

model.compile(
    optimizer=keras.optimizers.Adam(1e-2), loss="binary_crossentropy", metrics=metrics
)

In [0]:
weight_for_0 = 1.0 / y[y == 0].count()
weight_for_1 = 1.0 / y[y == 1].count()

In [67]:
weight_for_1

0.003236245954692557

In [68]:
callbacks = [keras.callbacks.ModelCheckpoint("titanic_model_at_epoch_{epoch}.h5")]
class_weight = {0: weight_for_0, 1: weight_for_1}

model.fit(
    X,
    y,
    batch_size=2048,
    epochs=30,
    verbose=2,
    callbacks=callbacks,
    validation_data=(test_X, test_y),
    class_weight=class_weight,
)

Epoch 1/30
1/1 - 1s - loss: 0.0023 - fn: 144.0000 - fp: 239.0000 - tn: 253.0000 - tp: 165.0000 - precision: 0.4084 - recall: 0.5340 - val_loss: 1.2143 - val_fn: 0.0000e+00 - val_fp: 56.0000 - val_tn: 1.0000 - val_tp: 33.0000 - val_precision: 0.3708 - val_recall: 1.0000
Epoch 2/30
1/1 - 0s - loss: 0.0029 - fn: 37.0000 - fp: 450.0000 - tn: 42.0000 - tp: 272.0000 - precision: 0.3767 - recall: 0.8803 - val_loss: 3.9263 - val_fn: 33.0000 - val_fp: 0.0000e+00 - val_tn: 57.0000 - val_tp: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/30
1/1 - 0s - loss: 0.0140 - fn: 309.0000 - fp: 0.0000e+00 - tn: 492.0000 - tp: 0.0000e+00 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 1.4473 - val_fn: 33.0000 - val_fp: 0.0000e+00 - val_tn: 57.0000 - val_tp: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 4/30
1/1 - 0s - loss: 0.0053 - fn: 308.0000 - fp: 1.0000 - tn: 491.0000 - tp: 1.0000 - precision: 0.5000 - recall: 0.0032 - val_loss: 0.7652 - val_fn:

<tensorflow.python.keras.callbacks.History at 0x7fd9462626a0>

In [0]:
result1 = pd.DataFrame(model.predict(test_X).round())

In [0]:
result1['Survived'] = test_y.values
result1['result'] = np.where(result1[0] == result1.Survived, 1, 0)

In [71]:
result1.result.mean()

0.7