## Settings

In [1]:
# 一つ上の階層からモジュールを参照できるようにする
import sys
sys.path.append('..')

In [2]:
# ロギング用
import os
from datetime import datetime, timezone, timedelta
from logs.logger import create_logger, get_logger
# モデル作成補助用
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from features.funcs import load_feather, load_feather_ohe
from evals.funcs import get_pred_result, get_acc_and_logloss, print_conf_matrix
from inputs.funcs import load_data
# モデル作成用
from models.keras import ModelKeras
from keras.layers import Dense, Dropout

In [3]:
# モジュールの変更を自動的に反映する
%load_ext autoreload
%autoreload 2

In [4]:
# loggingの設定を行う
today = datetime.now(timezone(timedelta(hours=9))).strftime('%Y%m%d')
exp_version = today
os.environ['exp_version'] = exp_version
create_logger(exp_version)

In [5]:
os.listdir('../features/nn')

['fare_test_std_te.feather',
 'age_train_std_te.feather',
 'fare_train_std_te.feather',
 'age_test_std_mean.feather',
 'cabin_train_exist.feather',
 'cabin_train_B-F.feather',
 'test_ohe.feather',
 'train_ohe_2_z.feather',
 'age_train_std_mean.feather',
 'fare_test_<10.feather',
 'age_train_30s.feather',
 'age_test_std_te.feather',
 'fare_test_std_mean.feather',
 'age_train_<10.feather',
 'cabin_test_B-F.feather',
 'cabin_test_exist.feather',
 'train_ohe.feather',
 'fare_train_std_mean.feather',
 'test_ohe_2_z.feather',
 'fare_train_>50.feather',
 'fare_test_>50.feather',
 'age_test_30s.feather',
 'fare_train_<10.feather',
 'age_test_<10.feather']

In [6]:
def save_submission(submission_df):
    file_name = 'keras_submission_{}'.format(today)
    file_name_suffix = input(f'Enter File Suffix: {file_name}_...')
    if file_name_suffix != str(0):
        submission_file_path = f'submissions/{today}/{file_name}_{file_name_suffix}.csv'
        submission_df.to_csv(submission_file_path, index=False)

## Keras

### Base

In [116]:
train_x, train_y, test_x = load_data()

In [117]:
train_x = train_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked'], axis=1)
train_x = train_x.dropna()
train_y = train_y.iloc[train_x.index]

In [34]:
model = ModelKeras(logging=True)
layer1 = Dense(1024, activation='relu', input_shape=(train_x.shape[1],))
layer2 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer4 = Dense(1, activation='sigmoid')
model.add_layers(layer1, layer2, layer3, layer4)
model.compile()
model.set_fit_params(epochs=5, verbose=1)

[INFO] 2021-02-14 14:46:28,380 >>	=== NN KERAS MODEL ===


In [35]:
pred_train = get_pred_result(model, train_x, train_y)
get_acc_and_logloss(pred_train, train_y, logging=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[INFO] 2021-02-14 14:46:32,555 >>	ACCURACY: 0.6820728291316527
[INFO] 2021-02-14 14:46:32,555 >>	LOGLOSS: 0.6367720671554686
[INFO] 2021-02-14 14:46:32,556 >>	data size: 714
[INFO] 2021-02-14 14:46:32,557 >>	correct predictions: 487
[INFO] 2021-02-14 14:46:32,558 >>	


In [36]:
age_mean = train_x['Age'].mean()
fare_mean = train_x['Fare'].mean()
passenger_id = test_x['PassengerId']
test_x = test_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked'], axis=1)
test_x.loc[test_x['Age'].isna(), 'Age'] = age_mean
test_x.loc[test_x['Fare'].isna(), 'Fare'] = fare_mean
preds = model.predict(test_x)
preds = [round(pred[0]) for pred in preds]

In [42]:
submission = pd.DataFrame({'PassengerId': passenger_id, 'Survived': preds})
save_submission(submission)

Enter File Suffix: keras_submission_20210214_... 0


## Age standardized and target encoding

In [194]:
train_x, train_y, test_x = load_data()

In [195]:
base_path = '../features/nn/'
cols_to_rplc_train = {
    'Age': base_path + 'age_train_std_te.feather',
    'Fare': base_path + 'fare_train_std_te.feather',
    'SibSp': base_path + 'sibsp_train_std.feather',
    'Parch': base_path + 'parch_train_std.feather'
}
train_x = load_feather(train_x, cols_to_rplc_train)
train_x = load_feather_ohe(train_x, base_path + 'train_ohe.feather')
train_x = load_feather_ohe(train_x, base_path + 'train_ohe_2_z.feather')

cols_train = {
    'Age_<10': base_path + 'age_train_<10.feather',
    'Age_30s': base_path + 'age_train_30s.feather',
    'Fare_>50': base_path + 'fare_train_>50.feather',
    'Fare_<10': base_path + 'fare_train_<10.feather',
    'Cabin_exist': base_path + 'cabin_train_exist.feather',
    'Cabin_B-F': base_path + 'cabin_train_B-F.feather',
    'SibSp_=1': base_path + 'sibsp_train_=1.feather',
    'Parch_>1': base_path + 'parch_train_>1.feather'
}
train_x = load_feather(train_x, cols_train)

In [196]:
train_x = train_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked', 'Pclass'], axis=1)
train_x = train_x.drop([col for col in train_x.columns if 'Cabin' in col], axis=1)
train_x = train_x.drop([col for col in train_x.columns if 'Ticket' in col], axis=1)
train_x = train_x.drop(['Embarked_Q', 'Embarked_S'], axis=1)
train_x = train_x.drop(['Pclass_1', 'Pclass_2'], axis=1)
train_x = train_x.drop(['Fare'], axis=1)
# train_x = train_x.drop(['Age'], axis=1)
train_x = train_x.drop(['SibSp'], axis=1)
train_x = train_x.drop(['Parch'], axis=1)

In [197]:
train_x.head(3)

Unnamed: 0,Age,Pclass_3,Sex_female,Sex_male,Embarked_C,Age_<10,Age_30s,Fare_>50,Fare_<10,SibSp_=1,Parch_>1
0,-0.596004,1,0,1,0,1,0,0,1,1,0
1,0.634445,0,1,0,1,1,0,1,0,1,0
2,-0.288392,1,1,0,0,1,0,0,1,0,0


In [198]:
model = ModelKeras(logging=True, model_name='with_Embarked_C, with_Pclass_1, dropout:0.2, epochs: 10, learning_rate:0.001')
layer_d = Dropout(0.2)
layer1 = Dense(2048, activation='relu', input_shape=(train_x.shape[1],))
layer2 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer4 = Dense(1, activation='sigmoid')
model.add_layers(layer1, layer_d, layer4)
model.compile(learning_rate=0.001)
model.set_fit_params(epochs=20, verbose=1)

[INFO] 2021-02-21 22:06:18,464 >>	=== NN KERAS MODEL ===
[INFO] 2021-02-21 22:06:18,465 >>	PARAMS: {'model_name': 'with_Embarked_C, with_Pclass_1, dropout:0.2, epochs: 10, learning_rate:0.001'}


In [199]:
pred_train = get_pred_result(model, train_x, train_y)
get_acc_and_logloss(pred_train, train_y, logging=True)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[INFO] 2021-02-21 22:06:23,511 >>	ACCURACY: 0.8024691358024691
[INFO] 2021-02-21 22:06:23,512 >>	LOGLOSS: 0.4456393865525605
[INFO] 2021-02-21 22:06:23,513 >>	data size: 891
[INFO] 2021-02-21 22:06:23,513 >>	correct predictions: 715
[INFO] 2021-02-21 22:06:23,514 >>	


In [200]:
model.fit(tr_x=train_x, tr_y=train_y, va_x=train_x, va_y=train_y)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [201]:
passenger_id = test_x['PassengerId']

In [202]:
base_path = '../features/nn/'
cols_to_rplc_train = {
    'Age': base_path + 'age_test_std_te.feather',
    'Fare': base_path + 'fare_test_std_te.feather',
    'SibSp': base_path + 'sibsp_test_std.feather',
    'Parch': base_path + 'parch_test_std.feather'
}
test_x = load_feather(test_x, cols_to_rplc_train)
test_x = load_feather_ohe(test_x, base_path + 'test_ohe.feather')
test_x = load_feather_ohe(test_x, base_path + 'test_ohe_2_z.feather')

cols_test = {
    'Age_<10': base_path + 'age_test_<10.feather',
    'Age_30s': base_path + 'age_test_30s.feather',
    'Fare_>50': base_path + 'fare_test_>50.feather',
    'Fare_<10': base_path + 'fare_test_<10.feather',
    'Cabin_exist': base_path + 'cabin_test_exist.feather',
    'Cabin_B-F': base_path + 'cabin_test_B-F.feather',
    'SibSp_=1': base_path + 'sibsp_test_=1.feather',
    'Parch_>1': base_path + 'parch_test_>1.feather'
}
test_x = load_feather(test_x, cols_test)

In [203]:
test_x = test_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked', 'Pclass'], axis=1)
test_x = test_x.drop([col for col in test_x.columns if 'Cabin' in col], axis=1)
test_x = test_x.drop([col for col in test_x.columns if 'Ticket' in col], axis=1)
test_x = test_x.drop(['Embarked_Q', 'Embarked_S'], axis=1)
test_x = test_x.drop(['Pclass_1', 'Pclass_2'], axis=1)
test_x = test_x.drop(['Fare'], axis=1)
# test_x = test_x.drop(['Age'], axis=1)
test_x = test_x.drop(['SibSp'], axis=1)
test_x = test_x.drop(['Parch'], axis=1)

In [204]:
prediction = model.predict(test_x).ravel()
prediction = [round(pred) for pred in prediction]

In [205]:
submission = pd.DataFrame({'PassengerId': passenger_id, 'Survived': prediction})
save_submission(submission)

Enter File Suffix: keras_submission_20210221_... 5


## Age standardized and mean

In [173]:
train_x, train_y, test_x = load_data()

In [174]:
base_path = '../features/nn/'
cols_to_rplc_train = {
    'Age': base_path + 'age_train_std_mean.feather',
    'Fare': base_path + 'fare_train_std_mean.feather'
}
train_x = load_feather(train_x, cols_to_rplc_train)
train_x = load_feather_ohe(train_x, base_path + 'train_ohe.feather')
train_x = load_feather_ohe(train_x, base_path + 'train_ohe_2_z.feather')

In [175]:
train_x = train_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked', 'Pclass'], axis=1)
train_x = train_x.drop([col for col in train_x.columns if 'Cabin' in col ], axis=1)
train_x = train_x.drop([col for col in train_x.columns if 'Ticket' in col ], axis=1)
train_x = train_x.drop(['Embarked_Q', 'Embarked_S'], axis=1)

In [176]:
model = ModelKeras(logging=True, model_name='with_Embarked_C, dropout:0.2, epochs: 10, learning_rate:0.001')
layer_d1 = Dropout(0.2)
layer_d2 = Dropout(0.3)
layer1 = Dense(2048, activation='relu', input_shape=(train_x.shape[1],))
layer2 = Dense(512, activation='relu')
layer3 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer4 = Dense(1, activation='sigmoid')
model.add_layers(layer1, layer_d, layer2, layer_d2, layer4)
model.compile(learning_rate=0.001)
model.set_fit_params(epochs=10, verbose=1)

[INFO] 2021-02-14 15:42:27,920 >>	=== NN KERAS MODEL ===
[INFO] 2021-02-14 15:42:27,921 >>	PARAMS: {'model_name': 'with_Embarked_C, dropout:0.2, epochs: 10, learning_rate:0.001'}


In [177]:
pred_train = get_pred_result(model, train_x, train_y)
get_acc_and_logloss(pred_train, train_y, logging=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[INFO] 2021-02-14 15:42:37,899 >>	ACCURACY: 0.8047138047138047
[INFO] 2021-02-14 15:42:37,900 >>	LOGLOSS: 0.4505417245775857
[INFO] 2021-02-14 15:42:37,900 >>	data size: 891
[INFO] 2021-02-14 15:42:37,901 >>	correct predictions: 717
[INFO] 2021-02-14 15:42:37,902 >>	


In [178]:
model.fit(tr_x=train_x, tr_y=train_y, va_x=train_x, va_y=train_y)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [179]:
passenger_id = test_x['PassengerId']

In [180]:
base_path = '../features/nn/'
cols_to_rplc_train = {
    'Age': base_path + 'age_test_std_mean.feather',
    'Fare': base_path + 'fare_test_std_mean.feather'
}
test_x = load_feather(test_x, cols_to_rplc_train)
test_x = load_feather_ohe(test_x, base_path + 'test_ohe.feather')
test_x = load_feather_ohe(test_x, base_path + 'test_ohe_2_z.feather')

In [181]:
test_x = test_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked', 'Pclass'], axis=1)
test_x = test_x.drop([col for col in test_x.columns if 'Cabin' in col ], axis=1)
test_x = test_x.drop([col for col in test_x.columns if 'Ticket' in col ], axis=1)
test_x = test_x.drop(['Embarked_Q', 'Embarked_S'], axis=1)

In [182]:
prediction = model.predict(test_x).ravel()
prediction = [round(pred) for pred in prediction]

In [183]:
submission = pd.DataFrame({'PassengerId': passenger_id, 'Survived': prediction})
save_submission(submission)

Enter File Suffix: keras_submission_20210214_... 4
