In [1]:
import numpy as np
import pandas as pd
import pickle

In [2]:
# Import my modules.
import sys, os
from pathlib import Path
current_dir = os.path.join(Path().resolve())
sys.path.append(str(current_dir) + '/../')

from modules import utils
from modules import models
from modules import preprocess

import importlib
for m in [utils, models, preprocess]:
    importlib.reload(m)

In [3]:
pd.set_option('display.max_rows', 300)
pd.set_option('display.max_columns', 150)

In [7]:
mbd = 'microbusiness_density'

df_train, df_test, df_subm = utils.load_dataset(subm='../submission/submission_13732_trend.csv')
df_all, df_census = utils.merge_dataset(df_train, df_test, pop=False, unemploy=False, census=False, co_est=False, coord=False, 
                                        fix_pop=False, merge41=True, df_subm=df_subm)

In [5]:
params={
        "act_thre": 140,
        "abs_thre": 0,
        "USE_LAG": 8,
        "USE_TREND": False,
        "blacklist": [],
        "blacklistcfips": [],
        "clip": (-0.0044, 0.0046),
        "model": 'lgb',
        "max_window": 12,
        "light": True,
        "start_max_scale": 41,
        "start_all_dict": 35,
        "smooth_method": 'v3',
        "save_output_dic": True
    }
trend_params = {
        "high_trend_params": {
            1: {
                'params':{
                    'n':3,
                    'thre':3,
                    'thre_r':0,
                    'lower_bound': 15000,
                    'upper_bound': 999999,
                    'use_regularize': True,
                    'v_regularize': [0.01, 0.008],
                    'v_clip':[0.999, 1.004]
                },
                'method': 'mean'
            },
            2: {
                'params':{
                    'n':4,
                    'thre':4,
                    'thre_r':0,
                    'lower_bound': 15000,
                    'upper_bound': 999999,
                    'use_regularize': True,
                    'v_regularize': [0.01, 0.008],
                    'v_clip':[0.999, 1.004]
                },
                'method': 'mean'
            },
            3: {
                'params':{
                    'n':5,
                    'thre':5,
                    'thre_r':0,
                    'lower_bound': 15000,
                    'upper_bound': 999999,
                    'use_regularize': True,
                    'v_regularize': [0.01, 0.008],
                    'v_clip':[0.999, 1.004]
                },
                'method': 'mean'
            }
        },
        "low_trend_params": {
            1: {
                'params':{
                    'n':3,
                    'thre':3,
                    'thre_r':0,
                    'lower_bound': 60,
                    'upper_bound': 140,
                    'use_regularize': True,
                    'v_regularize': [0.03, 0.02],
                    'v_clip':[0.999, 1.004]
                },
                'method': 'replace'
            },
            2: {
                'params':{
                    'n':4,
                    'thre':4,
                    'thre_r':0,
                    'lower_bound': 60,
                    'upper_bound': 140,
                    'use_regularize': True,
                    'v_regularize': [0.03, 0.02],
                    'v_clip':[0.999, 1.004]
                },
                'method': 'replace'
            },
            3: {
                'params':{
                    'n':5,
                    'thre':5,
                    'thre_r':0,
                    'lower_bound': 60,
                    'upper_bound': 140,
                    'use_regularize': True,
                    'v_regularize': [0.03, 0.02],
                    'v_clip':[0.999, 1.004]
                },
                'method': 'replace'
            }
        }
}

## バリデーション

### 前の予測結果を用いて予測していく

In [8]:
accum_pred = models.LgbmBaseline('params_accum',df_subm, df_all, df_census, params=params)
accum_pred.accum_validation(m_len=2, max_pred_m=1)

create df_all_dict[35] and df_all_dict_original[35]
add lag features: max_scale=35
smooth_outlier: max_scale=35
used method: v3
# of fixed cfips: 1159
# of fixed value: 2167
create df_all_dict[36] and df_all_dict_original[36]
add lag features: max_scale=36
smooth_outlier: max_scale=36
used method: v3
# of fixed cfips: 1159
# of fixed value: 2181
create df_all_dict[37] and df_all_dict_original[37]
add lag features: max_scale=37
smooth_outlier: max_scale=37
used method: v3
# of fixed cfips: 1168
# of fixed value: 2234
create df_all_dict[38] and df_all_dict_original[38]
add lag features: max_scale=38
smooth_outlier: max_scale=38
used method: v3
# of fixed cfips: 1186
# of fixed value: 2278
create df_all_dict[39] and df_all_dict_original[39]
add lag features: max_scale=39
smooth_outlier: max_scale=39
used method: v3
# of fixed cfips: 1189
# of fixed value: 2321
create df_all_dict[40] and df_all_dict_original[40]
add lag features: max_scale=40
smooth_outlier: max_scale=40
used method: v3
# 



valid_times:  39
pred_m:  1
train_times:  38
use df_all_dict[38]
use lgb.
valid_times:  38
pred_m:  1
train_times:  37
use df_all_dict[37]
use lgb.
valid_times:  37
pred_m:  1
train_times:  36
use df_all_dict[36]
use lgb.
valid_times:  36
pred_m:  1
train_times:  35
use df_all_dict[35]
use lgb.
saved output_dic[1].
saved params_accum_2023-03-06_21:58:08.pickle
saved output/params_accum_2023-03-06_21:58:08.csv


## Submissionの作成

### 予測結果を活用して予測

In [23]:
test_subm = models.LgbmBaseline('test_subm_accum', df_subm, df_all, df_census, start_all_dict=40, params=params)
test_subm.create_submission(target_scale=[42, 43], save=False)

add lag features: max_scale=40
created df_all_dict[40]
add lag features: max_scale=41
created df_all_dict[41]
valid_times:  42
pred_m:  1
train_times:  41
use df_all_dict[41]
['scale', 'state_i', 'select_rate1_lag1', 'select_rate1_lag2', 'select_rate1_lag3', 'select_rate1_lag4', 'select_rate1_lag5', 'select_rate1_lag6', 'select_rate1_lag7', 'select_rate1_rsum2', 'select_rate1_rsum4', 'select_rate1_rsum6', 'select_rate1_rsum8', 'select_rate1_rsum10', 'select_rate1_rsum12', 'select_active_lag1_diff1', 'select_active_lag1_diff2', 'select_active_lag1_diff3', 'select_active_lag1_diff4', 'select_active_lag1_diff5', 'select_active_lag1_diff6', 'select_active_lag1_diff7']
use lgb.
saved output_dic[1].
create df_all_dict[42].
add lag features: max_scale=42
success in updating df_all_dict
valid_times:  43
pred_m:  1
train_times:  42
use df_all_dict[42]
use lgb.
saved output_dic[2].


In [24]:
df_submission =test_subm.df_submission
df_merged = utils.compare_submission(df_subm, 'submission_13818_trend')

In [None]:
# ensemble
0.01261112205424169

# trend_multi有り
0.03200090765098272