## Settings

In [1]:
# 一つ上の階層からモジュールを参照できるようにする
import sys
sys.path.append('..')

In [2]:
# ロギング用
import os
from datetime import datetime, timezone, timedelta
from logs.logger import create_logger, get_logger
# モデル作成補助用
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from features.funcs import load_feather, load_feather_ohe
from evals.funcs import get_pred_result, get_acc_and_logloss, print_conf_matrix
from inputs.funcs import load_data
# モデル作成用
from models.keras import ModelKeras
from keras.layers import Dense, Dropout

In [3]:
# モジュールの変更を自動的に反映する
%load_ext autoreload
%autoreload 2

In [4]:
# loggingの設定を行う
today = datetime.now(timezone(timedelta(hours=9)))
exp_version = today.strftime('%Y%m%d')
os.environ['exp_version'] = exp_version
create_logger(exp_version)

In [5]:
os.listdir('../features/nn')

['fare_test_std_te.feather',
 'age_train_std_te.feather',
 'fare_train_std_te.feather',
 'age_test_std_mean.feather',
 'test_ohe.feather',
 'train_ohe_2_z.feather',
 'age_train_std_mean.feather',
 'age_test_std_te.feather',
 'fare_test_std_mean.feather',
 'train_ohe.feather',
 'fare_train_std_mean.feather',
 'test_ohe_2_z.feather']

## Keras

### Base

In [6]:
train_x, train_y, _ = load_data()

In [7]:
train_x = train_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked'], axis=1)
train_x = train_x.dropna()
train_y = train_y.iloc[train_x.index]

In [8]:
model = ModelKeras(logging=True)
layer1 = Dense(1024, activation='relu', input_shape=(train_x.shape[1],))
layer2 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer4 = Dense(1, activation='sigmoid')
model.add_layers(layer1, layer2, layer3, layer4)
model.compile()
model.set_fit_params(epochs=10, verbose=0)

[INFO] 2021-02-13 16:08:01,101 >>	=== NN KERAS MODEL ===


In [9]:
pred_train = get_pred_result(model, train_x, train_y)
get_acc_and_logloss(pred_train, train_y, logging=True)

[INFO] 2021-02-13 16:08:08,712 >>	ACCURACY: 0.7128851540616247
[INFO] 2021-02-13 16:08:08,713 >>	LOGLOSS: 0.5943284557133793
[INFO] 2021-02-13 16:08:08,714 >>	data size: 714
[INFO] 2021-02-13 16:08:08,715 >>	correct predictions: 509
[INFO] 2021-02-13 16:08:08,716 >>	


#### Age standardized and target encoding

In [10]:
train_x, train_y, test_x = load_data()

In [11]:
base_path = '../features/nn/'
cols_to_rplc_train = {
    'Age': base_path + 'age_train_std_te.feather',
    'Fare': base_path + 'fare_train_std_te.feather'
}
train_x = load_feather(train_x, cols_to_rplc_train)
train_x = load_feather_ohe(train_x, base_path + 'train_ohe.feather')
train_x = load_feather_ohe(train_x, '../features/nn/train_ohe_2_z.feather')

In [12]:
train_x = train_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked'], axis=1)
# train_x = train_x.dropna()
# train_y = train_y.iloc[train_x.index]

In [19]:
model = ModelKeras(logging=True)
layer1 = Dense(1024, activation='relu', input_shape=(train_x.shape[1],))
layer2 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer4 = Dense(1, activation='sigmoid')
model.add_layers(layer1, layer2, layer3, layer4)
model.compile()
model.set_fit_params(epochs=10, verbose=1)

[INFO] 2021-02-13 16:36:00,583 >>	=== NN KERAS MODEL ===


In [17]:
pred_train = get_pred_result(model, train_x, train_y)
get_acc_and_logloss(pred_train, train_y, logging=False)

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
data size: 891
correct predictions: 781
accuracy: 0.8765432099
logloss: 0.2947481360


In [134]:
model = ModelKeras(logging=True)
layer1 = Dense(1024, activation='relu', input_shape=(train_x.shape[1],))
layer2 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer3 = Dense(1024, activation='relu')
layer4 = Dense(1, activation='sigmoid')
model.add_layers(layer1, layer2, layer3, layer4)
model.compile()
model.set_fit_params(epochs=10, verbose=0)

[INFO] 2021-02-13 00:14:34,668 >>	=== NN KERAS MODEL ===


In [135]:
model.fit(tr_x=train_x, tr_y=train_y, va_x=train_x, va_y=train_y)

In [136]:
passenger_id = test_x['PassengerId']

In [137]:
base_path = '../features/nn/'
cols_to_rplc_train = {
    'Age': base_path + 'age_test_std_te.feather',
    'Fare': base_path + 'fare_test_std_te.feather'
}
test_x = load_feather(test_x, cols_to_rplc_train)
test_x = load_feather_ohe(test_x, base_path + 'test_ohe.feather')
test_x = load_feather_ohe(test_x, '../features/nn/test_ohe_2_z.feather')

In [138]:
test_x = test_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked'], axis=1)

In [143]:
prediction = model.model.predict(test_x).ravel()
prediction = [int(pred) for pred in prediction]

In [144]:
submission = pd.DataFrame({'PassengerId': passenger_id, 'Survived': prediction})

In [145]:
submission_file_path = 'submissions/keras_submission_{}.csv'.format(today.strftime('%Y%m%d'))
submission.to_csv(submission_file_path, index=False)

#### Age standardized and mean

In [14]:
train_x, train_y, _ = load_data()

In [15]:
base_path = '../features/nn/'
cols_to_rplc_train = {
    'Age': base_path + 'age_train_std_mean.feather',
    'Fare': base_path + 'fare_train_std_mean.feather'
}
train_x = load_feather(train_x, cols_to_rplc_train)

In [16]:
train_x = train_x.drop(['PassengerId', 'Name', 'Cabin', 'Ticket', 'Sex', 'Embarked'], axis=1)
train_x = train_x.dropna()
train_y = train_y.iloc[train_x.index]

In [17]:
pred_train = get_pred_result(model, train_x, train_y)
get_acc_and_logloss(pred_train, train_y, logging=False)

data size: 891
correct predictions: 653
accuracy: 0.7328843996
logloss: 0.5741233327
