In [1]:
import pandas as pd
import numpy as np
import lightgbm
from tqdm import tqdm
import warnings

In [2]:
# 경고 끄기
pd.set_option('mode.chained_assignment', None)
warnings.filterwarnings(action='ignore')

In [3]:
def preprocessing(temp_df, pum, len_lag) :
    # p_lag, q_lag 추가
    for lag in range(1,len_lag+1) :
      temp_df[f'p_lag_{lag}'] = -1
      temp_df[f'q_lag_{lag}'] = -1
      for index in range(lag, len(temp_df)) :
        temp_df.loc[index, f'p_lag_{lag}'] = temp_df[f'{pum}_가격(원/kg)'][index-lag] #1일전, 2일전, ... 가격을 feature로 추가
        temp_df.loc[index, f'q_lag_{lag}'] = temp_df[f'{pum}_거래량(kg)'][index-lag] #1일전, 2일전, ... 거래량을 feature로 추가

    # month 추가
    temp_df['date'] = pd.to_datetime(temp_df['date'])
    temp_df['month'] = temp_df['date'].dt.month

    # 예측 대상(1w,2w,4w) 추가
    for week in ['1_week','2_week','4_week'] :
      temp_df[week] = 0
      n_week = int(week[0])
      for index in range(len(temp_df)) :
        try : temp_df[week][index] = temp_df[f'{pum}_가격(원/kg)'][index+7*n_week]
        except : continue

    # 불필요한 column 제거        
    temp_df = temp_df.drop(['date',f'{pum}_거래량(kg)',f'{pum}_가격(원/kg)'], axis=1)
    
    return temp_df

In [4]:
train = pd.read_csv('data/public_data/train.csv')
train.head(2)

Unnamed: 0,date,요일,배추_거래량(kg),배추_가격(원/kg),무_거래량(kg),무_가격(원/kg),양파_거래량(kg),양파_가격(원/kg),건고추_거래량(kg),건고추_가격(원/kg),...,청상추_거래량(kg),청상추_가격(원/kg),백다다기_거래량(kg),백다다기_가격(원/kg),애호박_거래량(kg),애호박_가격(원/kg),캠벨얼리_거래량(kg),캠벨얼리_가격(원/kg),샤인마스캇_거래량(kg),샤인마스캇_가격(원/kg)
0,2016-01-01,금요일,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2016-01-02,토요일,80860.0,329.0,80272.0,360.0,122787.5,1281.0,3.0,11000.0,...,5125.0,9235.0,434.0,2109.0,19159.0,2414.0,880.0,2014.0,0.0,0.0


In [5]:
# preprocessing 함수 예시
pum = '배추'
temp_df = train[['date',f'{pum}_거래량(kg)', f'{pum}_가격(원/kg)']]
preprocessing(temp_df, pum, len_lag=28)

Unnamed: 0,p_lag_1,q_lag_1,p_lag_2,q_lag_2,p_lag_3,q_lag_3,p_lag_4,q_lag_4,p_lag_5,q_lag_5,...,p_lag_26,q_lag_26,p_lag_27,q_lag_27,p_lag_28,q_lag_28,month,1_week,2_week,4_week
0,-1,-1.0,-1,-1.0,-1,-1.0,-1,-1.0,-1,-1.0,...,-1,-1.0,-1,-1.0,-1,-1.0,1,420,449,625
1,0,0.0,-1,-1.0,-1,-1.0,-1,-1.0,-1,-1.0,...,-1,-1.0,-1,-1.0,-1,-1.0,1,389,454,733
2,329,80860.0,0,0.0,-1,-1.0,-1,-1.0,-1,-1.0,...,-1,-1.0,-1,-1.0,-1,-1.0,1,0,0,1048
3,0,0.0,329,80860.0,0,0.0,-1,-1.0,-1,-1.0,...,-1,-1.0,-1,-1.0,-1,-1.0,1,398,475,638
4,478,1422742.5,0,0.0,329,80860.0,0,0.0,-1,-1.0,...,-1,-1.0,-1,-1.0,-1,-1.0,1,431,511,597
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1728,1807,2007471.3,1838,1757465.6,1813,2046286.3,2925,1959.0,1812,1346091.4,...,1476,760499.0,1564,763266.0,1561,1020033.2,9,0,0,0
1729,1839,1856965.0,1807,2007471.3,1838,1757465.6,1813,2046286.3,2925,1959.0,...,0,0.0,1476,760499.0,1564,763266.0,9,0,0,0
1730,1789,1880095.5,1839,1856965.0,1807,2007471.3,1838,1757465.6,1813,2046286.3,...,1133,1441152.8,0,0.0,1476,760499.0,9,0,0,0
1731,1760,1661090.9,1789,1880095.5,1839,1856965.0,1807,2007471.3,1838,1757465.6,...,1093,1279591.6,1133,1441152.8,0,0.0,9,0,0,0


In [6]:
def nmae(week_answer, week_submission):
    answer = week_answer
    target_idx = np.where(answer!=0)
    true = answer[target_idx]
    pred = week_submission[target_idx]
    score = np.mean(np.abs(true-pred)/true)
    
    return score


def at_nmae(pred, dataset):
    y_true = dataset.get_label()
    week_1_answer = y_true[0::3]
    week_2_answer = y_true[1::3]
    week_4_answer = y_true[2::3]
    
    week_1_submission = pred[0::3]
    week_2_submission = pred[1::3]
    week_4_submission = pred[2::3]
    
    score1 = nmae(week_1_answer, week_1_submission)
    score2 = nmae(week_2_answer, week_2_submission)
    score4 = nmae(week_4_answer, week_4_submission)
    
    score = (score1+score2+score4)/3
    
    return 'score', score, False

In [7]:
def model_train(x_train, y_train, x_valid, y_valid) :
    params = {'learning_rate': 0.01, 
              'max_depth': 6, 
              'boosting': 'gbdt', 
              'objective': 'regression',  
              'is_training_metric': True, 
              'num_leaves': 100, 
              'feature_fraction': 0.8, 
              'bagging_fraction': 0.8, 
              'bagging_freq': 5, 
              'seed':42,
              'num_threads':8
             }

    model = lightgbm.train(params, 
                   train_set = lightgbm.Dataset(data = x_train, label = y_train),
                   num_boost_round = 10000, 
                   valid_sets = lightgbm.Dataset(data = x_valid, label = y_valid), 
                   init_model = None, 
                   early_stopping_rounds = 100,
                   feval = at_nmae,
                   verbose_eval = False
                    )
    
    return model

In [8]:
unique_pum = [
    '배추', '무', '양파', '건고추','마늘',
    '대파', '얼갈이배추', '양배추', '깻잎',
    '시금치', '미나리', '당근',
    '파프리카', '새송이', '팽이버섯', '토마토',
]

unique_kind = [
    '청상추', '백다다기', '애호박', '캠벨얼리', '샤인마스캇'
]

In [9]:
model_dict = {}
split = 28 #validation

for pum in tqdm(unique_pum + unique_kind):
    # 품목 품종별 전처리
    temp_df = train[['date',f'{pum}_거래량(kg)', f'{pum}_가격(원/kg)']]
    temp_df = preprocessing(temp_df, pum, len_lag=28)
    
    # 주차별(1,2,4w) 학습
    for week_num in [1,2,4] :
        x = temp_df[temp_df[f'{week_num}_week']>0].iloc[:,:-3]
        y = temp_df[temp_df[f'{week_num}_week']>0][f'{week_num}_week']
        
        #train, test split
        x_train = x[:-split]
        y_train = y[:-split]
        x_valid = x[-split:]
        y_valid = y[-split:]
        
        model_dict[f'{pum}_model_{week_num}'] = model_train(x_train, y_train, x_valid, y_valid)

  0%|          | 0/21 [00:00<?, ?it/s]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1447, number of used features: 57
[LightGBM] [Info] Start training from score 679.538355
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1441, number of used features: 57
[LightGBM] [Info] Start training from score 680.627342
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1429, number of used features: 57
[LightGBM] [Info] Start training from score 681.605318


  5%|▍         | 1/21 [00:27<09:11, 27.60s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1446, number of used features: 57
[LightGBM] [Info] Start training from score 568.656985
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1440, number of used features: 57
[LightGBM] [Info] Start training from score 569.642361
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1428, number of used features: 57
[LightGBM] [Info] Start training from score 570.455182


 10%|▉         | 2/21 [01:04<10:24, 32.87s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1445, number of used features: 57
[LightGBM] [Info] Start training from score 759.676125
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1439, number of used features: 57
[LightGBM] [Info] Start training from score 757.897151
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1427, number of used features: 57
[LightGBM] [Info] Start training from score 754.278907


 14%|█▍        | 3/21 [01:31<09:05, 30.29s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1426, number of used features: 57
[LightGBM] [Info] Start training from score 11129.511921
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1420, number of used features: 57
[LightGBM] [Info] Start training from score 11153.730282
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1408, number of used features: 57
[LightGBM] [Info] Start training from score 11197.019176


 19%|█▉        | 4/21 [01:56<08:02, 28.38s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1435, number of used features: 57
[LightGBM] [Info] Start training from score 4030.516376
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1429, number of used features: 57
[LightGBM] [Info] Start training from score 4027.606718
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1417, number of used features: 57
[LightGBM] [Info] Start training from score 4019.030346


 24%|██▍       | 5/21 [02:23<07:25, 27.87s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1446, number of used features: 57
[LightGBM] [Info] Start training from score 1343.253112
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1440, number of used features: 57
[LightGBM] [Info] Start training from score 1343.163194
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1428, number of used features: 57
[LightGBM] [Info] Start training from score 1338.202381


 29%|██▊       | 6/21 [02:54<07:13, 28.93s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1439, number of used features: 57
[LightGBM] [Info] Start training from score 1044.302988
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1433, number of used features: 57
[LightGBM] [Info] Start training from score 1044.647592
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1421, number of used features: 57
[LightGBM] [Info] Start training from score 1042.720619


 33%|███▎      | 7/21 [03:22<06:41, 28.66s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1442, number of used features: 57
[LightGBM] [Info] Start training from score 608.380721
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1436, number of used features: 57
[LightGBM] [Info] Start training from score 609.649025
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1424, number of used features: 57
[LightGBM] [Info] Start training from score 610.894663

 38%|███▊      | 8/21 [03:53<06:19, 29.18s/it]


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1446, number of used features: 57
[LightGBM] [Info] Start training from score 5319.617566
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1440, number of used features: 57
[LightGBM] [Info] Start training from score 5319.672917
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1428, number of used features: 57
[LightGBM] [Info] Start training from score 5305.740896


 43%|████▎     | 9/21 [04:21<05:47, 28.97s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1443, number of used features: 57
[LightGBM] [Info] Start training from score 2686.424116
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1437, number of used features: 57
[LightGBM] [Info] Start training from score 2688.929019
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1425, number of used features: 57
[LightGBM] [Info] Start training from score 2689.562807


 48%|████▊     | 10/21 [04:49<05:13, 28.53s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1440, number of used features: 57
[LightGBM] [Info] Start training from score 2476.580556
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1434, number of used features: 57
[LightGBM] [Info] Start training from score 2475.780335
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1422, number of used features: 57
[LightGBM] [Info] Start training from score 2467.916315


 52%|█████▏    | 11/21 [05:13<04:32, 27.22s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1438, number of used features: 57
[LightGBM] [Info] Start training from score 1066.341446
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1432, number of used features: 57
[LightGBM] [Info] Start training from score 1067.795391
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1420, number of used features: 57
[LightGBM] [Info] Start training from score 1069.976761


 57%|█████▋    | 12/21 [05:44<04:16, 28.52s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1435, number of used features: 57
[LightGBM] [Info] Start training from score 3424.883624
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1429, number of used features: 57
[LightGBM] [Info] Start training from score 3410.724983
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1417, number of used features: 57
[LightGBM] [Info] Start training from score 3363.064926


 62%|██████▏   | 13/21 [06:11<03:42, 27.83s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1442, number of used features: 57
[LightGBM] [Info] Start training from score 2194.133842
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1436, number of used features: 57
[LightGBM] [Info] Start training from score 2193.572423
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1424, number of used features: 57
[LightGBM] [Info] Start training from score 2189.389747


 67%|██████▋   | 14/21 [06:36<03:10, 27.19s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1435, number of used features: 57
[LightGBM] [Info] Start training from score 1677.395122
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1429, number of used features: 57
[LightGBM] [Info] Start training from score 1678.906228
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1417, number of used features: 57
[LightGBM] [Info] Start training from score 1680.748765


 71%|███████▏  | 15/21 [07:02<02:40, 26.75s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1442, number of used features: 57
[LightGBM] [Info] Start training from score 2141.410541
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1436, number of used features: 57
[LightGBM] [Info] Start training from score 2140.956128
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1424, number of used features: 57
[LightGBM] [Info] Start training from score 2136.935393

 76%|███████▌  | 16/21 [07:33<02:19, 28.00s/it]


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1441, number of used features: 57
[LightGBM] [Info] Start training from score 3179.123525
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1435, number of used features: 57
[LightGBM] [Info] Start training from score 3179.760976
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1423, number of used features: 57
[LightGBM] [Info] Start training from score 3175.827126


 81%|████████  | 17/21 [07:58<01:48, 27.23s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1437, number of used features: 57
[LightGBM] [Info] Start training from score 1836.699374
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1431, number of used features: 57
[LightGBM] [Info] Start training from score 1835.300489
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1419, number of used features: 57
[LightGBM] [Info] Start training from score 1830.317829


 86%|████████▌ | 18/21 [08:24<01:20, 26.82s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1442, number of used features: 57
[LightGBM] [Info] Start training from score 1847.013870
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1436, number of used features: 57
[LightGBM] [Info] Start training from score 1844.135794
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1424, number of used features: 57
[LightGBM] [Info] Start training from score 1831.014747


 90%|█████████ | 19/21 [08:51<00:53, 26.71s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1395, number of used features: 57
[LightGBM] [Info] Start training from score 3908.934767
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1389, number of used features: 57
[LightGBM] [Info] Start training from score 3913.192225
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 14293
[LightGBM] [Info] Number of data points in the train set: 1377, number of used features: 57
[LightGBM] [Info] Start training from score 3922.188816


 95%|█████████▌| 20/21 [09:16<00:26, 26.23s/it]

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12391
[LightGBM] [Info] Number of data points in the train set: 894, number of used features: 57
[LightGBM] [Info] Start training from score 13189.961969
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12018
[LightGBM] [Info] Number of data points in the train set: 894, number of used features: 57
[LightGBM] [Info] Start training from score 13189.961969
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11186
[LightGBM] [Info] Number of data points in the train set: 894, number of used features: 57
[LightGBM] [Info] Start training from score 13189.961969


100%|██████████| 21/21 [09:43<00:00, 27.79s/it]






In [10]:
submission = pd.read_csv('data/sample_submission.csv')
public_date_list = submission[submission['예측대상일자'].str.contains('2020')]['예측대상일자'].str.split('+').str[0].unique()
# ['2020-09-29', ...]

for date in tqdm(public_date_list) :
    test = pd.read_csv(f'./data/public_data/test_files/test_{date}.csv')
    for pum in unique_pum + unique_kind:
        # 예측기준일에 대해 전처리
        temp_test = pd.DataFrame([{'date' : date}]) #예측기준일
        alldata = pd.concat([train, test, temp_test], sort=False).reset_index(drop=True)
        alldata = alldata[['date', f'{pum}_거래량(kg)', f'{pum}_가격(원/kg)']].fillna(0)
        alldata = alldata.iloc[-28:].reset_index(drop=True)
        alldata = preprocessing(alldata, pum, len_lag=28)
        temp_test = alldata.iloc[-1].astype(float)
        
        # 개별 모델을 활용하여 1,2,4주 후 가격 예측
        for week_num in [1,2,4] :
            temp_model = model_dict[f'{pum}_model_{week_num}']
            result = temp_model.predict(temp_test[:-3])
            condition = (submission['예측대상일자']==f'{date}+{week_num}week')
            idx = submission[condition].index
            submission.loc[idx, f'{pum}_가격(원/kg)'] = result[0]

100%|██████████| 38/38 [03:45<00:00,  5.93s/it]


In [11]:
submission.to_csv('submission/baseline2_0920.csv',index=False)

In [12]:
submission

Unnamed: 0,예측대상일자,배추_가격(원/kg),무_가격(원/kg),양파_가격(원/kg),건고추_가격(원/kg),마늘_가격(원/kg),대파_가격(원/kg),얼갈이배추_가격(원/kg),양배추_가격(원/kg),깻잎_가격(원/kg),...,당근_가격(원/kg),파프리카_가격(원/kg),새송이_가격(원/kg),팽이버섯_가격(원/kg),토마토_가격(원/kg),청상추_가격(원/kg),백다다기_가격(원/kg),애호박_가격(원/kg),캠벨얼리_가격(원/kg),샤인마스캇_가격(원/kg)
0,2020-09-29+1week,1495.347263,859.134501,943.519578,23011.950429,5330.406613,2364.730015,1408.124364,1049.844008,7799.251742,...,1550.712980,4846.593905,2345.548550,2527.599370,3019.524969,3526.229770,2036.085197,2081.012106,3934.668262,10637.058606
1,2020-09-29+2week,1179.515990,801.570871,934.182302,17469.242448,5376.074435,1987.904739,1250.337131,880.891154,5493.315073,...,1495.789446,3802.466212,2197.041058,1688.550802,3038.634942,3741.783830,1864.313217,1779.668406,3741.398386,11053.072273
2,2020-09-29+4week,1074.262083,840.074329,965.698100,18722.509410,5412.172634,1492.950925,908.156647,753.059543,6121.340614,...,1316.978376,3470.972039,2189.470433,1692.501998,2475.424185,4605.212299,1832.703378,1745.393034,3846.055928,10049.587692
3,2020-09-30+1week,1495.187305,837.236204,963.475492,21174.897706,5369.335018,2326.080722,1327.094574,979.743983,7791.642147,...,1546.185196,4478.268861,2369.250182,2500.193060,3198.358124,3487.908049,1868.606694,1859.934004,3948.192848,10572.274800
4,2020-09-30+2week,1154.818249,807.683772,931.829122,20539.162389,5104.978318,2004.566508,1121.677236,872.775488,5457.043787,...,1436.138621,3752.195567,2197.041058,1686.519924,3221.874587,3784.283191,1795.710313,1782.467953,3721.828580,11095.428757
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,2021-11-03+2week,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
224,2021-11-03+4week,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
225,2021-11-04+1week,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
226,2021-11-04+2week,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
