In [1]:
import pandas as pd
import numpy as np
import pickle as pkl
import tensorflow as tf
import os, math
import config

from model_functions import build_model, run_session

os.makedirs(config.dir_ckpt , exist_ok=True)

raw_df = pd.read_csv('./data/dataset_kor/교통사망사고정보/Kor_Train_교통사망사고정보(12.1~17.6).csv', engine='python')
raw_df = raw_df[config.test_cols]

for key in config.kv_map.keys():
    raw_df[key] = raw_df[key].apply(lambda x: config.kv_map[key][x])

for mdl_num in range(1,4):
    if mdl_num == 1:
        np.random.seed(seed=1000)
        shuffle_idx = np.random.rand(len(raw_df)) 
        train_df = raw_df[shuffle_idx<= 0.85]
        valid_df = raw_df[shuffle_idx > 0.85]

    elif mdl_num == 2:
        np.random.seed(seed=2000)
        shuffle_idx = np.random.rand(len(raw_df)) 
        train_df = raw_df[shuffle_idx<= 0.85]
        valid_df = raw_df[shuffle_idx > 0.85]

    else:
        train_df = raw_df[:config.train_size]
        valid_df = raw_df[config.train_size:]
    
    
    config.path_ckpt = os.path.join(config.dir_ckpt, 'best{}.ckpt'.format(mdl_num))

    train_df_cate = train_df.loc[:,config.cate_cols]
    train_df_cont = train_df.loc[:,config.cont_cols]

    valid_df_cate = valid_df.loc[:,config.cate_cols]
    valid_df_cont = valid_df.loc[:,config.cont_cols]

    np.random.seed(seed=9)
    config.vaild_drop_mask = np.random.rand(valid_df_cate.values.shape[0], valid_df_cate.values.shape[1])>(1- config.keep_prop)
    config.vaild_cont_mask = np.random.rand(valid_df_cont.values.shape[0], valid_df_cont.values.shape[1])<(1- config.keep_prop)

    np.random.seed(seed=19)
    config.vaild_test_drop_mask = np.random.rand(valid_df_cate.values.shape[0], valid_df_cate.values.shape[1])>(1- config.keep_prop)
    config.vaild_test_cont_mask = np.random.rand(valid_df_cont.values.shape[0], valid_df_cont.values.shape[1])<(1- config.keep_prop)

    train_step = math.ceil(len(train_df)/ config.batch_size)
    valid_step = math.ceil(len(valid_df)/ config.batch_size)
    print("Data is Ready...")

    mdl = build_model(config)
    print("Model {} is built...".format(mdl_num))

    saver = tf.train.Saver(max_to_keep=5)
    init = tf.global_variables_initializer()

    # run session
    with tf.Session() as sess:
        tf.set_random_seed(seed=1991)
        np.random.seed(seed=1991)
        sess.run(init)
    #     saver.restore(sess, config.path_ckpt)
        min_val_loss = 9999
        print("Model {} is training".format(mdl_num))
        for epoch in range(1, config.epochs+1):
            #train
            trn_total_loss_ = run_session(sess, train_step, [train_df_cate, train_df_cont], config, mdl, mode=1)
            
            #valid
            val_total_loss_ = run_session(sess, valid_step, [valid_df_cate, valid_df_cont], config, mdl, mode=2)
            
            if config.verbose:
                print("Epoch : {}".format(epoch), end='\t')
                print("Train_loss : {:.6f} / {:.6f} / {:.6f}".format(trn_total_loss_[0], trn_total_loss_[1], trn_total_loss_[2]), end = '\t')
                print("Valid_loss : {:.6f} / {:.6f} / {:.6f}".format(val_total_loss_[0], val_total_loss_[1], val_total_loss_[2]), end = '\t')
            
            #monitor
            if val_total_loss_[0] < min_val_loss:
                saver.save(sess, config.path_ckpt)
                min_val_loss = val_total_loss_[0]
                if config.verbose:
                    print("Saved")
            else:
                if config.verbose:
                    print("No Saved")
        print("Training Model {} of 3 is Completed...".format(mdl_num))
        
print("Train is Ended, Do Test")

  from ._conv import register_converters as _register_converters


Data is Ready...
Model 1 is built...
Model 1 is training
Epoch : 1	Train_loss : 25.045007 / 22.924843 / 1.060082	Valid_loss : 19.254429 / 18.062612 / 0.595909	Saved
Epoch : 2	Train_loss : 17.289235 / 16.340595 / 0.474320	Valid_loss : 14.467536 / 13.733345 / 0.367096	Saved
Epoch : 3	Train_loss : 13.402817 / 12.769708 / 0.316554	Valid_loss : 11.276690 / 10.707379 / 0.284655	Saved
Epoch : 4	Train_loss : 10.860660 / 10.354470 / 0.253095	Valid_loss : 9.186281 / 8.720769 / 0.232756	Saved
Epoch : 5	Train_loss : 9.206586 / 8.781720 / 0.212433	Valid_loss : 7.810295 / 7.413648 / 0.198323	Saved
Epoch : 6	Train_loss : 8.225099 / 7.714998 / 0.255051	Valid_loss : 6.910857 / 6.539106 / 0.185875	Saved
Epoch : 7	Train_loss : 7.326221 / 6.959979 / 0.183121	Valid_loss : 6.306452 / 5.944329 / 0.181062	Saved
Epoch : 8	Train_loss : 6.768245 / 6.448703 / 0.159771	Valid_loss : 5.861177 / 5.515543 / 0.172817	Saved
Epoch : 9	Train_loss : 6.396270 / 6.075906 / 0.160182	Valid_loss : 5.542404 / 5.191767 / 0.175318

# End

In [4]:
def eval_score(preds_cate_, preds_cont_):
    #out
    print("Exproting...")
    #cliping
    preds_cont_[preds_cont_<0] = 0.0

    # Categorical Vals Restore
    pred_args = []
    for p_ in preds_cate_:
        start_idx = 0
        pred_arg = []
        for kl in config.cate_lens:
            pred_arg.append(np.argmax(p_[start_idx: start_idx+kl]))
            start_idx += kl
        pred_args.append(pred_arg)
    pred_args_np = np.array(pred_args)   


    total_cell = ((config.vaild_test_drop_mask - 1) * -1)

    pred_cells = (pred_args_np * total_cell)

    true_cells = valid_df_cate.values
     
    cate_score = (pred_cells == true_cells).sum() / total_cell.sum()
    nume_score = (np.exp(-np.square(preds_cont_ - valid_df_cont.values)) * config.vaild_test_cont_mask).sum() / config.vaild_test_cont_mask.sum()
    print('categorical_score:', cate_score)
    print('numeric_score:', nume_score)
    print('total_score', cate_score + nume_score )

In [5]:
# test
print("Predicting...")
with tf.Session() as sess:
    saver.restore(sess, '.\\ckpt\\best1.ckpt')
    preds_cate_, preds_cont_ = run_session(sess, valid_step, [valid_df_cate, valid_df_cont], config, mdl, mode=3)

    preds_cate_1 = np.concatenate(preds_cate_)
    preds_cont_1 = np.concatenate(preds_cont_)
    eval_score(preds_cate_1, preds_cont_1)

    saver.restore(sess, '.\\ckpt\\best2.ckpt')
    preds_cate_, preds_cont_ = run_session(sess, valid_step, [valid_df_cate, valid_df_cont], config, mdl, mode=3)

    preds_cate_2 = np.concatenate(preds_cate_)
    preds_cont_2 = np.concatenate(preds_cont_)
    eval_score(preds_cate_2, preds_cont_2)

    saver.restore(sess, '.\\ckpt\\best3.ckpt')
    preds_cate_, preds_cont_ = run_session(sess, valid_step, [valid_df_cate, valid_df_cont], config, mdl, mode=3)

    preds_cate_3 = np.concatenate(preds_cate_)
    preds_cont_3 = np.concatenate(preds_cont_)
    eval_score(preds_cate_3, preds_cont_3)


Predicting...
INFO:tensorflow:Restoring parameters from .\ckpt\best1.ckpt
Exproting...
categorical_score: 0.6416770064650951
numeric_score: 0.947972197004974
total_score 1.589649203470069
INFO:tensorflow:Restoring parameters from .\ckpt\best2.ckpt
Exproting...
categorical_score: 0.6469666296610723
numeric_score: 0.9467986515393735
total_score 1.593765281200446
INFO:tensorflow:Restoring parameters from .\ckpt\best3.ckpt
Exproting...
categorical_score: 0.610396395219748
numeric_score: 0.944045069936653
total_score 1.5544414651564011
