In [None]:
import pickle
import numpy as np
import pandas as pd
import mxnet as mx
import matplotlib.pyplot as plt
import seaborn as sns
from mxnet import ndarray as nd
from sklearn.preprocessing import MinMaxScaler

import auto_encoder
import deep_model
import dataset
import utils
import re_rank
import config

%load_ext autoreload
%autoreload 2

In [None]:
X_0612, Y_0612 = dataset.get_0612()
X_0102, Y_0102 = dataset.get_0102()
X_0305, _ = dataset.get_0305()
X_0305_v2, _ = dataset.get_0305(version='v2')

In [None]:
X_0612.shape

In [None]:
model = deep_model.CNN()
model.load_parameters(config.params['model_path'].format(X_0612.shape[2]), ctx=config.params['ctx'])

def inference(X, model):
    outputs = deep_model.predictProb(config.params, model, mx.gpu(), X)
    return outputs

In [None]:
refined_outputs_0612 = inference(X_0612, model)
refined_outputs_0102 = inference(X_0102, model)
refined_outputs_0305 = inference(X_0305, model)
refined_outputs_0305_v2 = inference(X_0305_v2, model)

In [None]:
pd.DataFrame({'applyid': pd.read_csv('data/train_0612.csv', encoding='gbk')['applyid'],
              'bad-prob': refined_outputs_0612,
              'good-prob': 1 - refined_outputs_0612}).to_csv('report/inference_06-12.csv',
                                                             index=False)
pd.DataFrame({'applyid': pd.read_csv('data/test_0102.csv', encoding='gbk')['applyid'],
              'bad-prob': refined_outputs_0102,
              'good-prob': 1 - refined_outputs_0102}).to_csv('report/inference_01-02.csv',
                                                             index=False)
pd.DataFrame({'applyid': pd.read_csv('data/test_0305.csv', encoding='gbk')['applyid'],
              'bad-prob': refined_outputs_0305,
              'good-prob': 1 - refined_outputs_0305}).to_csv('report/inference_03-05.csv',
                                                             index=False)
pd.DataFrame({'applyid': pd.read_csv('data/test_0305_v2.csv', encoding='gbk')['APPLYCD'],
              'bad-prob': refined_outputs_0305_v2,
              'good-prob': 1 - refined_outputs_0305_v2}).to_csv('report/inference_03-05_v2.csv',
                                                                index=False)

In [None]:
reranked_0612 = re_rank.rerank(refined_outputs_0612)
reranked_0102 = re_rank.rerank(refined_outputs_0102)
reranked_0305 = re_rank.rerank(refined_outputs_0305)
reranked_0305_v2 = re_rank.rerank(refined_outputs_0305_v2)

In [None]:
pd.DataFrame({'applyid': pd.read_csv('data/train_0612.csv', encoding='gbk')['applyid'],
              'bad-prob': reranked_0612,
              'good-prob': 1 - reranked_0612}).to_csv('report/inference_06-12_reranked.csv',
                                                      index=False)
pd.DataFrame({'applyid': pd.read_csv('data/test_0102.csv', encoding='gbk')['applyid'],
              'bad-prob': reranked_0102,
              'good-prob': 1 - reranked_0102}).to_csv('report/inference_01-02_reranked.csv',
                                                      index=False)
pd.DataFrame({'applyid': pd.read_csv('data/test_0305.csv', encoding='gbk')['applyid'],
              'bad-prob': reranked_0305,
              'good-prob': 1 - reranked_0305}).to_csv('report/inference_03-05_reranked.csv',
                                                      index=False)
pd.DataFrame({'applyid': pd.read_csv('data/test_0305_v2.csv', encoding='gbk')['APPLYCD'],
              'bad-prob': reranked_0305_v2,
              'good-prob': 1 - refined_outputs_0305_v2}).to_csv('report/inference_03-05_v2_reranked.csv',
                                                                index=False)

In [None]:
fig = plt.figure(figsize=(16, 8))
plt.suptitle('Bad / Good Ratio {} / 1'.format(config.params['bad_weight'] / config.params['good_weight']))
plt.subplot(2, 4, 1)
plt.title('06-12')
sns.distplot(refined_outputs_0612)
plt.subplot(2, 4, 5)
plt.title('06-12 (reranked)')
sns.distplot(reranked_0612)
plt.subplot(2, 4, 2)
plt.title('01-02')
sns.distplot(refined_outputs_0102)
plt.subplot(2, 4, 6)
plt.title('01-02 (reranked)')
sns.distplot(reranked_0102)
plt.subplot(2, 4, 3)
plt.title('03-05 (77k)')
sns.distplot(refined_outputs_0305)
plt.subplot(2, 4, 7)
plt.title('03-05 (77k, reranked)')
sns.distplot(reranked_0305)
plt.subplot(2, 4, 4)
plt.title('03-05 (78k)')
sns.distplot(refined_outputs_0305_v2)
plt.subplot(2, 4, 8)
plt.title('03-05 (78k, reranked)')
sns.distplot(reranked_0305_v2)
fig.savefig('report/probs_distribution.png', dpi=150)
plt.show()

In [None]:
with open(config.params['refined_0612'], 'wb') as f:
    np.save(f, refined_outputs_0612)
with open(config.params['refined_0102'], 'wb') as f:
    np.save(f, refined_outputs_0102)
with open(config.params['refined_0305'], 'wb') as f:
    np.save(f, refined_outputs_0305)
with open(config.params['refined_0305_v2'], 'wb') as f:
    np.save(f, refined_outputs_0305_v2)
    
with open(config.params['reranked_0612'], 'wb') as f:
    np.save(f, reranked_0612)
with open(config.params['reranked_0102'], 'wb') as f:
    np.save(f, reranked_0102)
with open(config.params['reranked_0305'], 'wb') as f:
    np.save(f, reranked_0305)
with open(config.params['reranked_0305_v2'], 'wb') as f:
    np.save(f, reranked_0305_v2)

In [None]:
deep_model.evaluateAll(config.params, model, config.params['ctx'], X=X_0612, Y=Y_0612)

In [None]:
deep_model.evaluateAll(config.params, model, config.params['ctx'], X=X_0102, Y=Y_0102)