In [94]:
# General imports
import warnings

warnings.filterwarnings("ignore")

# data
import pickle
#import openpyxl


from utils import *


def mixed_df(model_top, top_df, val_all_df_x, preds_all, num_top):
    """
    :objective: mix two models' outputs
    :param model_top: LGBMRegressor
    :param top_df: df sorted by "mean sales origin"
    :param val_all_df_x: full df
    :param preds_all: predicted values
    :param num_top: index to split
    :return: pd.DataFrame
    """
    top_idx = set(top_df.iloc[:num_top, :].index)
    val_idx = set(val_all_df_x.index)
    top_in_val = list(val_idx.intersection(top_idx))

    val_copy = val_all_df_x.copy()
    val_copy[TARGET] = preds_all

    for i in top_in_val:
        val_copy[TARGET].loc[val_copy.index == i] = model_top.predict(val_all_df_x.loc[val_all_df_x.index == i])

    return val_copy


def predict():
    """
    :objective: run model on test data
    :return: pd.DataFrame, pd.DataFrame
    """
    # Load Models
    model_path = MODELS_DIR + 'lgbm_finalmodel_wd_all.bin'
    model_wd_all = pickle.load(open(model_path, 'rb'))

    model_path = MODELS_DIR + 'lgbm_finalmodel_wd_top.bin'
    model_wd_top = pickle.load(open(model_path, 'rb'))

    model_path = MODELS_DIR + 'lgbm_finalmodel_wk_all.bin'
    model_wk_all = pickle.load(open(model_path, 'rb'))

    model_path = MODELS_DIR + 'lgbm_finalmodel_wk_top.bin'
    model_wk_top = pickle.load(open(model_path, 'rb'))

    # wd
    test_wd_origin = load_df('C:/Users/secre/Desktop/bigcon_fake/final/test_fin_wd_lag.pkl')
    test_wd = load_df('C:/Users/secre/Desktop/bigcon_fake/final/test_fin_wd_PP.pkl').copy()
    test_wd = test_wd.drop(['index', 'show_id', TARGET], axis=1)
    test_wd_sort = test_wd.sort_values('mean_sales_origin', ascending=False)
    # Predict all observations
    pred_test_wd_all = model_wd_all.predict(test_wd)
    # Mixed DF (Top: 727개)
    test_mixed_wd = mixed_df(model_wd_top, test_wd_sort, test_wd, pred_test_wd_all, num_top=727)
    test_wd_origin[TARGET] = test_mixed_wd[TARGET]

    # wk
    test_wk_origin = load_df('C:/Users/secre/Desktop/bigcon_fake/final/test_fin_wk_lag.pkl')
    test_wk = load_df('C:/Users/secre/Desktop/bigcon_fake/final/test_fin_wk_PP.pkl').copy()
    test_wk = test_wk.drop(['index', 'show_id', TARGET], axis=1)
    test_wk_sort = test_wk.sort_values('mean_sales_origin', ascending=False)
    # Predict all observations
    pred_test_wk_all = model_wk_all.predict(test_wk)
    # Mixed DF (Top: 249개)
    test_mixed_wk = mixed_df(model_wk_top, test_wk_sort, test_wk, pred_test_wk_all, num_top=249)
    test_wk_origin[TARGET] = test_mixed_wk[TARGET]
    # two outputs
    return test_wd_origin.drop(columns = ['index']), test_wk_origin.drop(columns = ['index'])


def submission(wd, wk):
    """
    create submission file
    :param wd: pd.DataFrame
    :param wk: pd.DataFrame
    :return:
    """
    test_final_wd = wd[['방송일시', '노출(분)', '마더코드', '상품코드', '상품명', '상품군', '판매단가', TARGET]]
    test_final_wk = wk[['방송일시', '노출(분)', '마더코드', '상품코드', '상품명', '상품군', '판매단가', TARGET]]
    test_final_full
    test_final_full = pd.concat([test_final_wd, test_final_wk], axis=0)
    test_final_full.sort_values(['방송일시'], inplace=True)
    
    
    ## 추가한 부분 ##############################################
    test_final_full = test_final_full.reset_index().drop(['index'], axis=1)
    test_zero = pd.read_pickle('C:/Users/secre/Desktop/bigcon_fake/final/zero.pkl')
    zero_idx = list(test_zero.index)
    
    offset = 0
    for i in zero_idx:
        test_final_full = pd.concat([test_final_full.iloc[:(i+offset),:], test_zero.loc[test_zero.index==i], test_final_full.iloc[(i+offset):,:]], axis=0)
        offset += 1

#     test_final_full.to_excel('C:/Users/secre/Desktop/bigcon_fake/final/submission.xlsx', index=False)

In [99]:
test_wd_origin, test_wk_origin = predict()
submission(test_wd_origin, test_wk_origin)

In [104]:
test_final_full

Unnamed: 0,방송일시,노출(분),마더코드,상품코드,상품명,상품군,판매단가,취급액
0,2020-06-01 06:20:00,20.0,100650,201971,잭필드 남성 반팔셔츠 4종,의류,59800,4.384077e+06
1,2020-06-01 06:40:00,20.0,100650,201971,잭필드 남성 반팔셔츠 4종,의류,59800,9.512172e+06
2,2020-06-01 07:00:00,20.0,100650,201971,잭필드 남성 반팔셔츠 4종,의류,59800,7.773682e+06
3,2020-06-01 07:20:00,20.0,100445,202278,쿠미투니카 쿨 레이시 란쥬쉐이퍼&팬티,속옷,69900,1.831314e+07
4,2020-06-01 07:40:00,20.0,100445,202278,쿠미투니카 쿨 레이시 란쥬쉐이퍼&팬티,속옷,69900,3.306095e+07
...,...,...,...,...,...,...,...,...
2848,2020-06-30 18:40,20.0,100073,200195,삼성화재 행복한파트너 주택상해윤전자(올케어),무형,-,
2849,2020-06-30 19:00,20.0,100073,200195,삼성화재 행복한파트너 주택상해윤전자(올케어),무형,-,
2886,2020-07-01 0:20,20.0,100660,201989,쉴렉스 안마의자 렌탈서비스,무형,-,
2887,2020-07-01 0:40,20.0,100660,201989,쉴렉스 안마의자 렌탈서비스,무형,-,
