In [1]:
import pandas as pd
import numpy as np
import pickle as pkl
import tensorflow as tf
import os, math
import config

from model_functions import build_model, run_session

os.makedirs(config.dir_ckpt , exist_ok=True)

raw_df = pd.read_csv('./data/dataset_kor/교통사망사고정보/Kor_Train_교통사망사고정보(12.1~17.6).csv', engine='python')
raw_df = raw_df[config.test_cols]

for key in config.kv_map.keys():
    raw_df[key] = raw_df[key].apply(lambda x: config.kv_map[key][x])

for mdl_num in range(1,4):
    if mdl_num == 1:
        np.random.seed(seed=1000)
        shuffle_idx = np.random.rand(len(raw_df)) 
        train_df = raw_df[shuffle_idx<= 0.85]
        valid_df = raw_df[shuffle_idx > 0.85]

    elif mdl_num == 2:
        np.random.seed(seed=2000)
        shuffle_idx = np.random.rand(len(raw_df)) 
        train_df = raw_df[shuffle_idx<= 0.85]
        valid_df = raw_df[shuffle_idx > 0.85]

    else:
        train_df = raw_df[:config.train_size]
        valid_df = raw_df[config.train_size:]
    
    
    config.path_ckpt = os.path.join(config.dir_ckpt, 'best{}.ckpt'.format(mdl_num))

    train_df_cate = train_df.loc[:,config.cate_cols]
    train_df_cont = train_df.loc[:,config.cont_cols]

    valid_df_cate = valid_df.loc[:,config.cate_cols]
    valid_df_cont = valid_df.loc[:,config.cont_cols]

    np.random.seed(seed=9)
    config.vaild_drop_mask = np.random.rand(valid_df_cate.values.shape[0], valid_df_cate.values.shape[1])>(1- config.keep_prop)
    config.vaild_cont_mask = np.random.rand(valid_df_cont.values.shape[0], valid_df_cont.values.shape[1])<(1- config.keep_prop)

    np.random.seed(seed=19)
    config.vaild_test_drop_mask = np.random.rand(valid_df_cate.values.shape[0], valid_df_cate.values.shape[1])>(1- config.keep_prop)
    config.vaild_test_cont_mask = np.random.rand(valid_df_cont.values.shape[0], valid_df_cont.values.shape[1])<(1- config.keep_prop)

    train_step = math.ceil(len(train_df)/ config.batch_size)
    valid_step = math.ceil(len(valid_df)/ config.batch_size)
    print("Data is Ready...")

    mdl = build_model(config)
    print("Model {} is built...".format(mdl_num))

    saver = tf.train.Saver(max_to_keep=5)
    init = tf.global_variables_initializer()

    # run session
    with tf.Session() as sess:
        tf.set_random_seed(seed=1991)
        np.random.seed(seed=1991)
        sess.run(init)
    #     saver.restore(sess, config.path_ckpt)
        min_val_loss = 9999
        print("Model {} is training".format(mdl_num))
        for epoch in range(1, config.epochs+1):
            #train
            trn_total_loss_ = run_session(sess, train_step, [train_df_cate, train_df_cont], config, mdl, mode=1)
            
            #valid
            val_total_loss_ = run_session(sess, valid_step, [valid_df_cate, valid_df_cont], config, mdl, mode=2)
            
            if config.verbose:
                print("Epoch : {}".format(epoch), end='\t')
                print("Train_loss : {:.6f} / {:.6f} / {:.6f}".format(trn_total_loss_[0], trn_total_loss_[1], trn_total_loss_[2]), end = '\t')
                print("Valid_loss : {:.6f} / {:.6f} / {:.6f}".format(val_total_loss_[0], val_total_loss_[1], val_total_loss_[2]), end = '\t')
            
            #monitor
            if val_total_loss_[0] < min_val_loss:
                saver.save(sess, config.path_ckpt)
                min_val_loss = val_total_loss_[0]
                if config.verbose:
                    print("Saved")
            else:
                if config.verbose:
                    print("No Saved")
        print("Training Model {} of 3 is Completed...".format(mdl_num))
        
print("Train is Ended, Do Test")

  from ._conv import register_converters as _register_converters


Data is Ready...
Model 1 is built...
Model 1 is training
Epoch : 1	Train_loss : 23.640390 / 21.692763 / 0.973814	Valid_loss : 17.843201 / 16.809255 / 0.516973	Saved
Epoch : 2	Train_loss : 15.038313 / 14.129243 / 0.454535	Valid_loss : 12.243634 / 11.492975 / 0.375330	Saved
Epoch : 3	Train_loss : 10.414303 / 9.812088 / 0.301108	Valid_loss : 8.911087 / 8.341046 / 0.285021	Saved
Epoch : 4	Train_loss : 7.828258 / 7.358059 / 0.235099	Valid_loss : 7.076513 / 6.593978 / 0.241267	Saved
Epoch : 5	Train_loss : 6.399501 / 5.980609 / 0.209446	Valid_loss : 6.020309 / 5.610040 / 0.205134	Saved
Epoch : 6	Train_loss : 5.679853 / 5.187026 / 0.246414	Valid_loss : 5.411513 / 5.014804 / 0.198355	Saved
Epoch : 7	Train_loss : 5.046168 / 4.698516 / 0.173826	Valid_loss : 5.045877 / 4.674028 / 0.185924	Saved
Epoch : 8	Train_loss : 4.694209 / 4.403048 / 0.145580	Valid_loss : 4.807809 / 4.443518 / 0.182145	Saved
Epoch : 9	Train_loss : 4.517829 / 4.214944 / 0.151442	Valid_loss : 4.676309 / 4.312336 / 0.181986	Save

# End

In [127]:
def eval_score(preds_cate_, preds_cont_):
    #out
    print("Exproting...")
    #cliping
    preds_cont_[preds_cont_<0] = 0.0

    # Categorical Vals Restore
    pred_args = []
    for p_ in preds_cate_:
        start_idx = 0
        pred_arg = []
        for kl in config.cate_lens:
            pred_arg.append(np.argmax(p_[start_idx: start_idx+kl]))
            start_idx += kl
        pred_args.append(pred_arg)
    pred_args_np = np.array(pred_args)   


    total_cell = ((config.vaild_test_drop_mask - 1) * -1)

    pred_cells = (pred_args_np * total_cell)

    true_cells = valid_df_cate.values
     
    cate_score = (pred_cells == true_cells).sum() / total_cell.sum()
    nume_score = (np.exp(-np.square(preds_cont_ - valid_df_cont.values)) * config.vaild_test_cont_mask).sum() / config.vaild_test_cont_mask.sum()
    print('categorical_score:', cate_score)
    print('numeric_score:', nume_score)
    print('total_score', cate_score + nume_score )

In [128]:
# test
print("Predicting...")
with tf.Session() as sess:
    saver.restore(sess, '.\\ckpt\\best1.ckpt')
    preds_cate_, preds_cont_ = run_session(sess, valid_step, [valid_df_cate, valid_df_cont], config, mdl, mode=3)

    preds_cate_1 = np.concatenate(preds_cate_)
    preds_cont_1 = np.concatenate(preds_cont_)
    eval_score(preds_cate_1, preds_cont_1)

    saver.restore(sess, '.\\ckpt\\best2.ckpt')
    preds_cate_, preds_cont_ = run_session(sess, valid_step, [valid_df_cate, valid_df_cont], config, mdl, mode=3)

    preds_cate_2 = np.concatenate(preds_cate_)
    preds_cont_2 = np.concatenate(preds_cont_)
    eval_score(preds_cate_2, preds_cont_2)

    saver.restore(sess, '.\\ckpt\\best3.ckpt')
    preds_cate_, preds_cont_ = run_session(sess, valid_step, [valid_df_cate, valid_df_cont], config, mdl, mode=3)

    preds_cate_3 = np.concatenate(preds_cate_)
    preds_cont_3 = np.concatenate(preds_cont_)
    eval_score(preds_cate_3, preds_cont_3)


Predicting...
INFO:tensorflow:Restoring parameters from .\ckpt\best1.ckpt
Exproting...
categorical_score: 0.6256122249069418
numeric_score: 0.940713424046426
total_score 1.566325648953368
INFO:tensorflow:Restoring parameters from .\ckpt\best2.ckpt
Exproting...
categorical_score: 0.6329915757852805
numeric_score: 0.939144471430407
total_score 1.5721360472156876
INFO:tensorflow:Restoring parameters from .\ckpt\best3.ckpt
Exproting...
categorical_score: 0.5981192450858748
numeric_score: 0.9414339235909605
total_score 1.5395531686768353
