# 사전 작업

## 모듈 로드

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import seaborn as sns
import gc

In [8]:
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.metrics import log_loss
from sklearn.model_selection import KFold, StratifiedKFold

In [9]:
import warnings
warnings.filterwarnings('ignore')

In [10]:
pd.set_option('display.max_columns', 400)

# 데이터 로드

In [13]:
path = './data/'

In [14]:
train = pd.read_csv(path + 'train_v3.csv')
test = pd.read_csv(path + 'test_v3.csv')

# 데이터 구분

In [7]:
train_neg = train[train.target < 0]
train_neg_not = train[train.target >= 0]

train_pos = train[train.target >= 0]
train_pos_not = train[train.target < 0]

train_without_outliers = train[train.outliers == 0]
train_outliers = train[train.outliers != 0]

In [8]:
templete = train.card_id.to_frame()

# Layer 1

## 제거할 피처

In [8]:
FEATS_EXCLUDED = ['first_active', 'card_id', 'target', 'outliers',
                  'hist_purchase_date_max', 'hist_purchase_date_min', 
                  'new_purchase_date_max', 'new_purchase_date_min']

In [9]:
param = {'num_leaves': 31,
         'min_data_in_leaf': 30, 
         'objective':'regression',
         'max_depth': -1,
         'learning_rate': 0.01,
         "min_child_samples": 20,
         "boosting": "gbdt",
         "feature_fraction": 0.9,
         "bagging_freq": 1,
         "bagging_fraction": 0.9 ,
         "bagging_seed": 11,
         "metric": 'rmse',
         "lambda_l1": 0.1,
         "verbosity": -1,
         "nthread": 4,
         "random_state": 4590}

### base

In [28]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=15)

oof_lgb = np.zeros(len(train))
predictions_lgb = np.zeros(len(test))
feature_importance = pd.DataFrame()

train_columns = [f for f in train.columns if f not in FEATS_EXCLUDED]

for fold_, (trn_idx, val_idx) in enumerate(folds.split(train, train['outliers'].values)):    
    print("fold n°{}".format(fold_))
    trn_data = lgb.Dataset(train.iloc[trn_idx][train_columns], label=train.iloc[trn_idx]['target'])
    val_data = lgb.Dataset(train.iloc[val_idx][train_columns], label=train.iloc[val_idx]['target'])

    num_round = 10000
    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=100, early_stopping_rounds = 200)
    oof_lgb[val_idx] = clf.predict(train.iloc[val_idx][train_columns], num_iteration=clf.best_iteration)
    
    predictions_lgb += clf.predict(test[train_columns], num_iteration=clf.best_iteration) / folds.n_splits
    
    fold_importance = pd.DataFrame()
    fold_importance["Feature"] = train_columns
    fold_importance["importance"] = clf.feature_importance()
    fold_importance["fold"] = fold_ + 1
    feature_importance = pd.concat([feature_importance, fold_importance], axis=0)
    
print("CV score: {:<8.5f}".format(mean_squared_error(train.target.values, oof_lgb)**0.5))

fold n°0
Training until validation scores don't improve for 200 rounds.
[100]	training's rmse: 3.65093	valid_1's rmse: 3.71886
[200]	training's rmse: 3.56623	valid_1's rmse: 3.68747
[300]	training's rmse: 3.50999	valid_1's rmse: 3.67526
[400]	training's rmse: 3.4668	valid_1's rmse: 3.66859
[500]	training's rmse: 3.4312	valid_1's rmse: 3.66532
[600]	training's rmse: 3.40089	valid_1's rmse: 3.66367
[700]	training's rmse: 3.37362	valid_1's rmse: 3.66194
[800]	training's rmse: 3.35011	valid_1's rmse: 3.66162
[900]	training's rmse: 3.32715	valid_1's rmse: 3.66089
[1000]	training's rmse: 3.30606	valid_1's rmse: 3.66029
[1100]	training's rmse: 3.28622	valid_1's rmse: 3.65951
[1200]	training's rmse: 3.26596	valid_1's rmse: 3.65943
[1300]	training's rmse: 3.24718	valid_1's rmse: 3.6596
[1400]	training's rmse: 3.22819	valid_1's rmse: 3.65974
Early stopping, best iteration is:
[1248]	training's rmse: 3.25659	valid_1's rmse: 3.65928
fold n°1
Training until validation scores don't improve for 200 r

In [33]:
pd.DataFrame({'card_id':train.card_id, 'target':oof_lgb}).to_csv('./data_modeling/base.csv', index=False)

### negative

In [11]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=15)

oof_lgb_neg = np.zeros(len(train_neg))
oof_lgb_neg_not = np.zeros(len(train_neg_not))
predictions_lgb_neg = np.zeros(len(test))
feature_importance = pd.DataFrame()

train_columns = [f for f in train_neg.columns if f not in FEATS_EXCLUDED]

for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_neg, train_neg['outliers'].values)):    
    print("fold n°{}".format(fold_))
    trn_data = lgb.Dataset(train_neg.iloc[trn_idx][train_columns], label=train_neg.iloc[trn_idx]['target'])
    val_data = lgb.Dataset(train_neg.iloc[val_idx][train_columns], label=train_neg.iloc[val_idx]['target'])

    num_round = 10000
    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=100, early_stopping_rounds = 200)
    oof_lgb_neg[val_idx] = clf.predict(train_neg.iloc[val_idx][train_columns], num_iteration=clf.best_iteration)
    
    oof_lgb_neg_not += clf.predict(train_neg_not[train_columns], num_iteration=clf.best_iteration) / folds.n_splits
    predictions_lgb_neg += clf.predict(test[train_columns], num_iteration=clf.best_iteration) / folds.n_splits
    
    fold_importance = pd.DataFrame()
    fold_importance["Feature"] = train_columns
    fold_importance["importance"] = clf.feature_importance()
    fold_importance["fold"] = fold_ + 1
    feature_importance = pd.concat([feature_importance, fold_importance], axis=0)
    
print("CV score: {:<8.5f}".format(mean_squared_error(train_neg.target.values, oof_lgb_neg)**0.5))

fold n°0
Training until validation scores don't improve for 200 rounds.
[100]	training's rmse: 4.37729	valid_1's rmse: 4.48336
[200]	training's rmse: 4.20132	valid_1's rmse: 4.41881
[300]	training's rmse: 4.09083	valid_1's rmse: 4.40427
[400]	training's rmse: 4.01414	valid_1's rmse: 4.39889
[500]	training's rmse: 3.95348	valid_1's rmse: 4.39641
[600]	training's rmse: 3.90102	valid_1's rmse: 4.39408
[700]	training's rmse: 3.85493	valid_1's rmse: 4.3923
[800]	training's rmse: 3.81237	valid_1's rmse: 4.39143
[900]	training's rmse: 3.77291	valid_1's rmse: 4.39225
Early stopping, best iteration is:
[793]	training's rmse: 3.81512	valid_1's rmse: 4.39137
fold n°1
Training until validation scores don't improve for 200 rounds.
[100]	training's rmse: 4.37377	valid_1's rmse: 4.50167
[200]	training's rmse: 4.19686	valid_1's rmse: 4.42827
[300]	training's rmse: 4.08735	valid_1's rmse: 4.40599
[400]	training's rmse: 4.01152	valid_1's rmse: 4.39848
[500]	training's rmse: 3.95058	valid_1's rmse: 4.395

In [34]:
templete.merge(pd.DataFrame({'card_id':train_neg_not.card_id, 'target': oof_lgb_neg_not}).append(pd.DataFrame({'card_id':train_neg.card_id, 'target': oof_lgb_neg})), how='left').to_csv('./data_modeling/neg.csv', index=False)

### positive

In [35]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=15)

oof_lgb_pos = np.zeros(len(train_pos))
oof_lgb_pos_not = np.zeros(len(train_pos_not))
predictions_lgb_pos = np.zeros(len(test))
feature_importance = pd.DataFrame()

train_columns = [f for f in train_pos.columns if f not in FEATS_EXCLUDED]

for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_pos, train_pos['outliers'].values)):    
    print("fold n°{}".format(fold_))
    trn_data = lgb.Dataset(train_pos.iloc[trn_idx][train_columns], label=train_pos.iloc[trn_idx]['target'])
    val_data = lgb.Dataset(train_pos.iloc[val_idx][train_columns], label=train_pos.iloc[val_idx]['target'])

    num_round = 10000
    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=100, early_stopping_rounds = 200)
    oof_lgb_pos[val_idx] = clf.predict(train_pos.iloc[val_idx][train_columns], num_iteration=clf.best_iteration)
    
    oof_lgb_pos_not += clf.predict(train_pos_not[train_columns], num_iteration=clf.best_iteration) / folds.n_splits
    predictions_lgb_pos += clf.predict(test[train_columns], num_iteration=clf.best_iteration) / folds.n_splits
    
    fold_importance = pd.DataFrame()
    fold_importance["Feature"] = train_columns
    fold_importance["importance"] = clf.feature_importance()
    fold_importance["fold"] = fold_ + 1
    feature_importance = pd.concat([feature_importance, fold_importance], axis=0)
    
print("CV score: {:<8.5f}".format(mean_squared_error(train_pos.target.values, oof_lgb_pos)**0.5))

fold n°0
Training until validation scores don't improve for 200 rounds.
[100]	training's rmse: 1.09163	valid_1's rmse: 1.12028
[200]	training's rmse: 1.05349	valid_1's rmse: 1.0887
[300]	training's rmse: 1.03405	valid_1's rmse: 1.07696
[400]	training's rmse: 1.02055	valid_1's rmse: 1.07118
[500]	training's rmse: 1.00977	valid_1's rmse: 1.06757
[600]	training's rmse: 1.00037	valid_1's rmse: 1.06554
[700]	training's rmse: 0.991945	valid_1's rmse: 1.06445
[800]	training's rmse: 0.984353	valid_1's rmse: 1.06328
[900]	training's rmse: 0.977305	valid_1's rmse: 1.06244
[1000]	training's rmse: 0.970732	valid_1's rmse: 1.06198
[1100]	training's rmse: 0.964622	valid_1's rmse: 1.06162
[1200]	training's rmse: 0.958608	valid_1's rmse: 1.06127
[1300]	training's rmse: 0.9528	valid_1's rmse: 1.06094
[1400]	training's rmse: 0.947226	valid_1's rmse: 1.06065
[1500]	training's rmse: 0.941975	valid_1's rmse: 1.0606
[1600]	training's rmse: 0.936464	valid_1's rmse: 1.06038
[1700]	training's rmse: 0.931153	va

In [36]:
templete.merge(pd.DataFrame({'card_id':train_pos_not.card_id, 'target': oof_lgb_pos_not}).append(pd.DataFrame({'card_id':train_pos.card_id, 'target': oof_lgb_pos})), how='left').to_csv('./data_modeling/pos.csv', index=False)

### without_outliers

In [37]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=15)

oof_lgb_without_outliers = np.zeros(len(train_without_outliers))
oof_lgb_outliers = np.zeros(len(train_outliers))
predictions_lgb_without_outliers = np.zeros(len(test))
feature_importance = pd.DataFrame()

train_columns = [f for f in train_without_outliers.columns if f not in FEATS_EXCLUDED]

for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_without_outliers, train_without_outliers['outliers'].values)):    
    print("fold n°{}".format(fold_))
    trn_data = lgb.Dataset(train_without_outliers.iloc[trn_idx][train_columns], label=train_without_outliers.iloc[trn_idx]['target'])
    val_data = lgb.Dataset(train_without_outliers.iloc[val_idx][train_columns], label=train_without_outliers.iloc[val_idx]['target'])

    num_round = 10000
    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=100, early_stopping_rounds = 200)
    oof_lgb_without_outliers[val_idx] = clf.predict(train_without_outliers.iloc[val_idx][train_columns], num_iteration=clf.best_iteration)
    
    oof_lgb_outliers += clf.predict(train_outliers[train_columns], num_iteration=clf.best_iteration) / folds.n_splits
    predictions_lgb_without_outliers += clf.predict(test[train_columns], num_iteration=clf.best_iteration) / folds.n_splits
    
    fold_importance = pd.DataFrame()
    fold_importance["Feature"] = train_columns
    fold_importance["importance"] = clf.feature_importance()
    fold_importance["fold"] = fold_ + 1
    feature_importance = pd.concat([feature_importance, fold_importance], axis=0)
    
print("CV score: {:<8.5f}".format(mean_squared_error(train_without_outliers.target.values, oof_lgb_without_outliers)**0.5))

fold n°0
Training until validation scores don't improve for 200 rounds.
[100]	training's rmse: 1.60508	valid_1's rmse: 1.61676
[200]	training's rmse: 1.57258	valid_1's rmse: 1.58947
[300]	training's rmse: 1.5569	valid_1's rmse: 1.57843
[400]	training's rmse: 1.54652	valid_1's rmse: 1.57278
[500]	training's rmse: 1.53831	valid_1's rmse: 1.56901
[600]	training's rmse: 1.53133	valid_1's rmse: 1.56669
[700]	training's rmse: 1.52511	valid_1's rmse: 1.56515
[800]	training's rmse: 1.51955	valid_1's rmse: 1.56427
[900]	training's rmse: 1.51443	valid_1's rmse: 1.5636
[1000]	training's rmse: 1.50958	valid_1's rmse: 1.56307
[1100]	training's rmse: 1.50494	valid_1's rmse: 1.56258
[1200]	training's rmse: 1.50054	valid_1's rmse: 1.56232
[1300]	training's rmse: 1.49619	valid_1's rmse: 1.56206
[1400]	training's rmse: 1.492	valid_1's rmse: 1.5618
[1500]	training's rmse: 1.4879	valid_1's rmse: 1.56166
[1600]	training's rmse: 1.48388	valid_1's rmse: 1.56144
[1700]	training's rmse: 1.47992	valid_1's rmse:

In [38]:
templete.merge(pd.DataFrame({'card_id':train_outliers.card_id, 'target': oof_lgb_outliers}).append(pd.DataFrame({'card_id':train_without_outliers.card_id, 'target': oof_lgb_without_outliers})), how='left').to_csv('./data_modeling/outliers.csv', index=False)

# Layer 2

In [11]:
base = pd.read_csv('./data_modeling/base.csv')
neg = pd.read_csv('./data_modeling/neg.csv')
pos = pd.read_csv('./data_modeling/pos.csv')
outliers = pd.read_csv('./data_modeling/outliers.csv')

In [15]:
layer2_train = pd.DataFrame({'card_id':base.card_id, 'base':base.target, 'neg':neg.target, 'pos':pos.target, 'outliers':outliers.target})
layer2_train['target'] = train.target
layer2_train.set_index('card_id', inplace=True)

# outlier
layer2_train['is_outliers'] = 0
layer2_train.loc[layer2_train['target'] < -30, 'is_outliers'] = 1

target = layer2_train.target.copy()
del layer2_train['target']

In [17]:
layer2_train

Unnamed: 0_level_0,base,neg,pos,outliers,is_outliers
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
C_ID_92a2005557,-0.281289,-0.789168,0.473972,-0.284566,0
C_ID_3d0044924f,-0.483703,-1.294766,0.992636,0.218978,0
C_ID_d639edf6cd,0.745200,-1.530998,1.623683,0.595680,0
C_ID_186d6a6901,0.136627,-1.043530,0.958335,0.197117,0
C_ID_cdbd2c0db2,-0.025510,-0.778105,0.594380,-0.128851,0
C_ID_0894217f2f,-1.380128,-2.565628,0.689279,-0.468703,0
C_ID_7e63323c00,-0.260393,-1.015265,0.632335,-0.326014,0
C_ID_dfa21fc124,0.103919,-1.539982,1.488595,-0.065392,0
C_ID_fe0fdac8ea,0.064726,-1.869386,1.096484,0.279655,0
C_ID_bf62c0b49d,-0.010490,-1.283262,1.098469,0.108912,0


In [54]:
param = {'num_leaves': 31,
         'min_data_in_leaf': 30, 
         'objective':'regression',
         'max_depth': -1,
         'learning_rate': 0.001,
         "min_child_samples": 20,
         "boosting": "gbdt",
         "feature_fraction": 0.9,
         "bagging_freq": 1,
         "bagging_fraction": 0.9 ,
         "bagging_seed": 11,
         "metric": 'rmse',
         "lambda_l1": 0.1,
         "verbosity": -1,
         "nthread": 4,
         "random_state": 4590}

In [55]:
FEATS_EXCLUDED = ['is_outliers']

In [57]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=15)

oof_train = np.zeros(len(layer2_train))

train_columns = [f for f in layer2_train.columns if f not in FEATS_EXCLUDED]

for fold_, (trn_idx, val_idx) in enumerate(folds.split(layer2_train, layer2_train.is_outliers)):    
    print("fold n°{}".format(fold_))
    trn_data = lgb.Dataset(layer2_train.iloc[trn_idx][train_columns], label=target.iloc[trn_idx])
    val_data = lgb.Dataset(layer2_train.iloc[val_idx][train_columns], label=target.iloc[val_idx])

    num_round = 10000
    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=100, early_stopping_rounds = 200)
    oof_train[val_idx] = clf.predict(layer2_train.iloc[val_idx][train_columns], num_iteration=clf.best_iteration)
    
print("CV score: {:<8.5f}".format(mean_squared_error(target.values, oof_train)**0.5))

fold n°0
Training until validation scores don't improve for 200 rounds.
[100]	training's rmse: 3.81218	valid_1's rmse: 3.81847
[200]	training's rmse: 3.78065	valid_1's rmse: 3.78953
[300]	training's rmse: 3.75419	valid_1's rmse: 3.76559
[400]	training's rmse: 3.73195	valid_1's rmse: 3.74591
[500]	training's rmse: 3.71312	valid_1's rmse: 3.72952
[600]	training's rmse: 3.69724	valid_1's rmse: 3.71607
[700]	training's rmse: 3.68376	valid_1's rmse: 3.70489
[800]	training's rmse: 3.67232	valid_1's rmse: 3.69585
[900]	training's rmse: 3.66256	valid_1's rmse: 3.68838
[1000]	training's rmse: 3.65421	valid_1's rmse: 3.68224
[1100]	training's rmse: 3.64695	valid_1's rmse: 3.67729
[1200]	training's rmse: 3.64067	valid_1's rmse: 3.67325
[1300]	training's rmse: 3.63518	valid_1's rmse: 3.66986
[1400]	training's rmse: 3.63037	valid_1's rmse: 3.66713
[1500]	training's rmse: 3.62605	valid_1's rmse: 3.66487
[1600]	training's rmse: 3.62226	valid_1's rmse: 3.66309
[1700]	training's rmse: 3.61879	valid_1's

[2000]	training's rmse: 3.61077	valid_1's rmse: 3.6532
[2100]	training's rmse: 3.60826	valid_1's rmse: 3.65266
[2200]	training's rmse: 3.60595	valid_1's rmse: 3.65218
[2300]	training's rmse: 3.60374	valid_1's rmse: 3.65191
[2400]	training's rmse: 3.60164	valid_1's rmse: 3.65171
[2500]	training's rmse: 3.59963	valid_1's rmse: 3.65161
[2600]	training's rmse: 3.59772	valid_1's rmse: 3.65145
[2700]	training's rmse: 3.59585	valid_1's rmse: 3.65151
Early stopping, best iteration is:
[2598]	training's rmse: 3.59776	valid_1's rmse: 3.65145
CV score: 3.65259 


In [58]:
from sklearn.ensemble import RandomForestRegressor

In [69]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=15)

oof_train = np.zeros(len(layer2_train))

train_columns = [f for f in layer2_train.columns if f not in FEATS_EXCLUDED]

for fold_, (trn_idx, val_idx) in enumerate(folds.split(layer2_train, layer2_train.is_outliers)):    
    rf = RandomForestRegressor(n_estimators=400, n_jobs=-1, max_depth=4, random_state = 5, bootstrap=True)
    print("fold n°{}".format(fold_))
    x_trn_data = layer2_train.iloc[trn_idx][train_columns]
    y_trn_data = target.iloc[trn_idx]
    x_val_data = layer2_train.iloc[val_idx][train_columns]
    y_val_data = target.iloc[val_idx]
    
    rf.fit(x_trn_data, y_trn_data)    
    
    print("cv: ", mean_squared_error(y_val_data, rf.predict(x_val_data))**0.5)
    oof_train[val_idx] = rf.predict(x_val_data)

print("CV score: {:<8.5f}".format(mean_squared_error(target.values, oof_train)**0.5))

fold n°0
cv:  3.6540617427495166
fold n°1
cv:  3.659229209052886
fold n°2
cv:  3.6411249591634394
fold n°3
cv:  3.6470496971875668
fold n°4
cv:  3.6470064974884675
CV score: 3.64970 


In [98]:
import keras
from keras.layers import Input, Dense, LeakyReLU, Dropout, Lambda, subtract, ReLU
from keras.models import Model
from keras import backend as K

In [77]:
del layer2_train['is_outliers']

In [88]:
from sklearn.model_selection import train_test_split

train_x, val_x, train_y, val_y = train_test_split(layer2_train, target, test_size=0.3, random_state=0)

In [121]:
def build_embedding(shape, dimensions):
    inp = Input(shape=shape)
    x = inp

    x = Dense(512)(x)
    x = ReLU()(x)
    x = Dropout(0.25)(x)
    x = Dense(256)(x)
    x = ReLU()(x)
    x = Dropout(0.25)(x)
    x = Dense(128)(x)
    x = ReLU()(x)
    x = Dropout(0.25)(x)
    x = Dense(64)(x)
    x = ReLU()(x)
    x = Dropout(0.25)(x)
    x = Dense(32)(x)
    x = ReLU()(x)
    x = Dropout(0.25)(x)
    x = Dense(16)(x)
    x = ReLU()(x)
    x = Dropout(0.25)(x)
    x = Dense(8)(x)
    x = ReLU()(x)
    x = Dropout(0.25)(x)
    x = Dense(4)(x)
    x = ReLU()(x)
    x = Dropout(0.25)(x)

    x = Dense(dimensions)(x)
    out = x

    return Model(inputs=inp, outputs=out)

In [122]:
model = build_embedding((4,), 1)

In [123]:
# optimizer
opt = keras.optimizers.Adam(lr=0.01, decay=0, amsgrad=False)
model.compile(optimizer=opt, loss="mean_squared_error")

In [124]:
epochs = 100

In [125]:
for epoch in range(epochs):
    res = model.fit(x=train_x,
                    y=train_y,
                    batch_size=128,
                    initial_epoch=epoch,
                    epochs=epoch + 1,
                    verbose=1,
                    shuffle=True,
                    validation_data=(val_x, val_y))
    
    print("val rmse: {}, train rmse: {}".format(res.history['val_loss'][0] ** 0.5, res.history['loss'][0] ** 0.5))
    train_loss = res.history['loss'][0]
    val_loss = res.history['val_loss'][0]

Train on 141341 samples, validate on 60576 samples
Epoch 1/1
val rmse: 3.8012766652264265, train rmse: 3.8288602959272144
Train on 141341 samples, validate on 60576 samples
Epoch 2/2
val rmse: 3.809855205756602, train rmse: 3.8335137983740117
Train on 141341 samples, validate on 60576 samples
Epoch 3/3
val rmse: 3.843912984444872, train rmse: 3.8468272233135625
Train on 141341 samples, validate on 60576 samples
Epoch 4/4
val rmse: 3.8438395090083737, train rmse: 3.8539806260136884
Train on 141341 samples, validate on 60576 samples
Epoch 5/5
val rmse: 3.8435262398392447, train rmse: 3.8539270376876993
Train on 141341 samples, validate on 60576 samples
Epoch 6/6
val rmse: 3.8438045309808433, train rmse: 3.853755803379949
Train on 141341 samples, validate on 60576 samples
Epoch 7/7
val rmse: 3.8438033368106987, train rmse: 3.853683185289836
Train on 141341 samples, validate on 60576 samples
Epoch 8/8
val rmse: 3.844199257789153, train rmse: 3.8537588107510894
Train on 141341 samples, vali

KeyboardInterrupt: 