# test train encoded csv 생성 코드 (11.10)_이혜승

## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os

from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings(action='ignore') 

## Fixed Random-Seed

In [70]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## Load Data

In [71]:
train_df = pd.read_csv('../train.csv')
test_df = pd.read_csv('../test.csv')
international_trade_df = pd.read_csv('../international_trade.csv')
submission = pd.read_csv('../submission.csv')

In [72]:
import pandas as pd
from pytimekr import pytimekr 

def process_dataset(dataset):
    # timestamp 열을 datetime 객체로 변환
    dataset['timestamp'] = pd.to_datetime(dataset['timestamp'])

    # 연도, 월, 일, 주, 요일 열 추가
    dataset['year'] = dataset['timestamp'].dt.isocalendar().year
    dataset['month'] = dataset['timestamp'].dt.month
    dataset['day'] = dataset['timestamp'].dt.day
    dataset['week'] = dataset['timestamp'].dt.isocalendar().week
    dataset['weekday'] = dataset['timestamp'].dt.isocalendar().day

    # 주말 및 평일 여부를 나타내는 열 추가
    dataset['isWeekday'] = ((dataset['weekday'] >= 1) & (dataset['weekday'] <= 5)).astype(int)
    dataset['isSaturday'] = (dataset['weekday'] == 6).astype(int)
    dataset['isSunday'] = (dataset['weekday'] == 7).astype(int)

    # 더 이상 필요하지 않은 weekday 열 삭제
    dataset.drop('weekday', axis=1, inplace=True)
    
     # 공휴일 처리
    unique_years = dataset['year'].unique()
    year_for_holidays = []
    for i in unique_years:
        year_for_holidays.append(pytimekr.holidays(year=i))
    
    all_holidays = sum(year_for_holidays, [])

    dataset['holiday'] = 0
    holiday_rows = dataset[dataset['timestamp'].isin(all_holidays)]
    dataset.loc[holiday_rows.index, 'holiday'] = 1

    return dataset

In [73]:
train = process_dataset(train_df)
test = process_dataset(test_df)
display(train)
display(test)

Unnamed: 0,ID,timestamp,item,corporation,location,supply(kg),price(원/kg),year,month,day,week,isWeekday,isSaturday,isSunday,holiday
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0,2019,1,1,1,1,0,0,1
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0,2019,1,2,1,1,0,0,0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0,2019,1,3,1,1,0,0,0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0,2019,1,4,1,1,0,0,0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0,2019,1,5,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0,2023,2,27,9,1,0,0,0
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0,2023,2,28,9,1,0,0,0
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0,2023,3,1,9,1,0,0,1
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0,2023,3,2,9,1,0,0,0


Unnamed: 0,ID,timestamp,item,corporation,location,year,month,day,week,isWeekday,isSaturday,isSunday,holiday
0,TG_A_J_20230304,2023-03-04,TG,A,J,2023,3,4,9,0,1,0,0
1,TG_A_J_20230305,2023-03-05,TG,A,J,2023,3,5,9,0,0,1,0
2,TG_A_J_20230306,2023-03-06,TG,A,J,2023,3,6,10,1,0,0,0
3,TG_A_J_20230307,2023-03-07,TG,A,J,2023,3,7,10,1,0,0,0
4,TG_A_J_20230308,2023-03-08,TG,A,J,2023,3,8,10,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,RD_F_J_20230327,2023-03-27,RD,F,J,2023,3,27,13,1,0,0,0
1088,RD_F_J_20230328,2023-03-28,RD,F,J,2023,3,28,13,1,0,0,0
1089,RD_F_J_20230329,2023-03-29,RD,F,J,2023,3,29,13,1,0,0,0
1090,RD_F_J_20230330,2023-03-30,RD,F,J,2023,3,30,13,1,0,0,0


In [74]:
international_trade_df.head()

Unnamed: 0,기간,품목명,수출 중량,수출 금액,수입 중량,수입 금액,무역수지
0,2019-01,토마토(신선한 것이나 냉장한 것으로 한정한다),356571,990,0,0,990
1,2019-01,양파,821330,222,4003206,1118,-896
2,2019-01,쪽파,60,1,93405,128,-127
3,2019-01,꽃양배추와 브로콜리(broccoli),160,1,638913,563,-562
4,2019-01,방울다다기 양배추,0,0,7580,38,-38


In [75]:
international_trade_df['year'] = international_trade_df['기간'].apply(lambda x : int(x[0:4]))
international_trade_df['month'] = international_trade_df['기간'].apply(lambda x : int(x[5:7]))

In [76]:
# Define the replacements
replacements = {
    '양배추': 'CB',
    '감귤': 'TG',
    '당근': 'CR',
    '꽃양배추와 브로콜리(broccoli)': 'BC'
}

for original, replacement in replacements.items():
    international_trade_df['품목명'] = international_trade_df['품목명'].replace(original, replacement)


# Filter the dataframe to only keep rows with the new abbreviations
filtered_tradedata = international_trade_df[international_trade_df['품목명'].isin(replacements.values())]


In [77]:
filtered_tradedata.tail()

Unnamed: 0,기간,품목명,수출 중량,수출 금액,수입 중량,수입 금액,무역수지,year,month
1236,2023-01,TG,81509,269,0,0,269,2023,1
1248,2023-02,BC,24,0,332640,352,-352,2023,2
1250,2023-02,CB,13188,13,377456,104,-91,2023,2
1253,2023-02,CR,22510,20,9260020,3758,-3737,2023,2
1264,2023-02,TG,6895,34,27765,98,-64,2023,2


In [78]:
# Merge the international trade data with the train and test datasets
train_merged = train.merge(filtered_tradedata, how='left', left_on=['year', 'month', 'item'], right_on=['year', 'month', '품목명'])

# Drop the 'Item_Code' column as it is redundant after merging
train_merged = train_merged.drop(columns=['품목명'])

# Check the first few rows of the merged train dataset and for any missing values introduced by the merge
# train_merged = train_merged.drop(columns=['기간'])
# train_merged = train_merged.fillna(0)
display(train_merged)
display(test)

Unnamed: 0,ID,timestamp,item,corporation,location,supply(kg),price(원/kg),year,month,day,...,isWeekday,isSaturday,isSunday,holiday,기간,수출 중량,수출 금액,수입 중량,수입 금액,무역수지
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0,2019,1,1,...,1,0,0,1,2019-01,58368.0,172.0,0.0,0.0,172.0
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0,2019,1,2,...,1,0,0,0,2019-01,58368.0,172.0,0.0,0.0,172.0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0,2019,1,3,...,1,0,0,0,2019-01,58368.0,172.0,0.0,0.0,172.0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0,2019,1,4,...,1,0,0,0,2019-01,58368.0,172.0,0.0,0.0,172.0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0,2019,1,5,...,0,1,0,0,2019-01,58368.0,172.0,0.0,0.0,172.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0,2023,2,27,...,1,0,0,0,,,,,,
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0,2023,2,28,...,1,0,0,0,,,,,,
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0,2023,3,1,...,1,0,0,1,,,,,,
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0,2023,3,2,...,1,0,0,0,,,,,,


Unnamed: 0,ID,timestamp,item,corporation,location,year,month,day,week,isWeekday,isSaturday,isSunday,holiday
0,TG_A_J_20230304,2023-03-04,TG,A,J,2023,3,4,9,0,1,0,0
1,TG_A_J_20230305,2023-03-05,TG,A,J,2023,3,5,9,0,0,1,0
2,TG_A_J_20230306,2023-03-06,TG,A,J,2023,3,6,10,1,0,0,0
3,TG_A_J_20230307,2023-03-07,TG,A,J,2023,3,7,10,1,0,0,0
4,TG_A_J_20230308,2023-03-08,TG,A,J,2023,3,8,10,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,RD_F_J_20230327,2023-03-27,RD,F,J,2023,3,27,13,1,0,0,0
1088,RD_F_J_20230328,2023-03-28,RD,F,J,2023,3,28,13,1,0,0,0
1089,RD_F_J_20230329,2023-03-29,RD,F,J,2023,3,29,13,1,0,0,0
1090,RD_F_J_20230330,2023-03-30,RD,F,J,2023,3,30,13,1,0,0,0


In [79]:
test

Unnamed: 0,ID,timestamp,item,corporation,location,year,month,day,week,isWeekday,isSaturday,isSunday,holiday
0,TG_A_J_20230304,2023-03-04,TG,A,J,2023,3,4,9,0,1,0,0
1,TG_A_J_20230305,2023-03-05,TG,A,J,2023,3,5,9,0,0,1,0
2,TG_A_J_20230306,2023-03-06,TG,A,J,2023,3,6,10,1,0,0,0
3,TG_A_J_20230307,2023-03-07,TG,A,J,2023,3,7,10,1,0,0,0
4,TG_A_J_20230308,2023-03-08,TG,A,J,2023,3,8,10,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,RD_F_J_20230327,2023-03-27,RD,F,J,2023,3,27,13,1,0,0,0
1088,RD_F_J_20230328,2023-03-28,RD,F,J,2023,3,28,13,1,0,0,0
1089,RD_F_J_20230329,2023-03-29,RD,F,J,2023,3,29,13,1,0,0,0
1090,RD_F_J_20230330,2023-03-30,RD,F,J,2023,3,30,13,1,0,0,0


In [80]:
march_trade = filtered_tradedata[filtered_tradedata['month']==3]
march_trade = march_trade.groupby(march_trade['품목명']).mean(['수출 중량','수출 금액','수입 중량', '수입 금액', '무역수지']).reset_index()
march_trade = march_trade.rename(columns={'품목명': 'item'})
march_trade.drop(columns=['year','month'],inplace=True)
test_merge = test.merge(march_trade, on='item',how='left')
# test.drop(columns=['year_y','month_y'])

test_merge


Unnamed: 0,ID,timestamp,item,corporation,location,year,month,day,week,isWeekday,isSaturday,isSunday,holiday,수출 중량,수출 금액,수입 중량,수입 금액,무역수지
0,TG_A_J_20230304,2023-03-04,TG,A,J,2023,3,4,9,0,1,0,0,7207.5,33.75,4665.5,14.5,19.25
1,TG_A_J_20230305,2023-03-05,TG,A,J,2023,3,5,9,0,0,1,0,7207.5,33.75,4665.5,14.5,19.25
2,TG_A_J_20230306,2023-03-06,TG,A,J,2023,3,6,10,1,0,0,0,7207.5,33.75,4665.5,14.5,19.25
3,TG_A_J_20230307,2023-03-07,TG,A,J,2023,3,7,10,1,0,0,0,7207.5,33.75,4665.5,14.5,19.25
4,TG_A_J_20230308,2023-03-08,TG,A,J,2023,3,8,10,1,0,0,0,7207.5,33.75,4665.5,14.5,19.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,RD_F_J_20230327,2023-03-27,RD,F,J,2023,3,27,13,1,0,0,0,,,,,
1088,RD_F_J_20230328,2023-03-28,RD,F,J,2023,3,28,13,1,0,0,0,,,,,
1089,RD_F_J_20230329,2023-03-29,RD,F,J,2023,3,29,13,1,0,0,0,,,,,
1090,RD_F_J_20230330,2023-03-30,RD,F,J,2023,3,30,13,1,0,0,0,,,,,


In [81]:
# 카테고리 변수 원핫인코딩
# One-hot encoding of categorical variables in both train and test datasets
train_final = pd.get_dummies(train_merged, columns=['corporation', 'location'])
test_final = pd.get_dummies(test_merge, columns=[ 'corporation', 'location'])

# Check the first few rows of train_encoded to confirm the changes
display(train_final)
display(test_final)

Unnamed: 0,ID,timestamp,item,supply(kg),price(원/kg),year,month,day,week,isWeekday,...,수입 금액,무역수지,corporation_A,corporation_B,corporation_C,corporation_D,corporation_E,corporation_F,location_J,location_S
0,TG_A_J_20190101,2019-01-01,TG,0.0,0.0,2019,1,1,1,1,...,0.0,172.0,True,False,False,False,False,False,True,False
1,TG_A_J_20190102,2019-01-02,TG,0.0,0.0,2019,1,2,1,1,...,0.0,172.0,True,False,False,False,False,False,True,False
2,TG_A_J_20190103,2019-01-03,TG,60601.0,1728.0,2019,1,3,1,1,...,0.0,172.0,True,False,False,False,False,False,True,False
3,TG_A_J_20190104,2019-01-04,TG,25000.0,1408.0,2019,1,4,1,1,...,0.0,172.0,True,False,False,False,False,False,True,False
4,TG_A_J_20190105,2019-01-05,TG,32352.0,1250.0,2019,1,5,1,0,...,0.0,172.0,True,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,452440.0,468.0,2023,2,27,9,1,...,,,False,False,False,False,False,True,True,False
59393,RD_F_J_20230228,2023-02-28,RD,421980.0,531.0,2023,2,28,9,1,...,,,False,False,False,False,False,True,True,False
59394,RD_F_J_20230301,2023-03-01,RD,382980.0,574.0,2023,3,1,9,1,...,,,False,False,False,False,False,True,True,False
59395,RD_F_J_20230302,2023-03-02,RD,477220.0,523.0,2023,3,2,9,1,...,,,False,False,False,False,False,True,True,False


Unnamed: 0,ID,timestamp,item,year,month,day,week,isWeekday,isSaturday,isSunday,...,수입 금액,무역수지,corporation_A,corporation_B,corporation_C,corporation_D,corporation_E,corporation_F,location_J,location_S
0,TG_A_J_20230304,2023-03-04,TG,2023,3,4,9,0,1,0,...,14.5,19.25,True,False,False,False,False,False,True,False
1,TG_A_J_20230305,2023-03-05,TG,2023,3,5,9,0,0,1,...,14.5,19.25,True,False,False,False,False,False,True,False
2,TG_A_J_20230306,2023-03-06,TG,2023,3,6,10,1,0,0,...,14.5,19.25,True,False,False,False,False,False,True,False
3,TG_A_J_20230307,2023-03-07,TG,2023,3,7,10,1,0,0,...,14.5,19.25,True,False,False,False,False,False,True,False
4,TG_A_J_20230308,2023-03-08,TG,2023,3,8,10,1,0,0,...,14.5,19.25,True,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,RD_F_J_20230327,2023-03-27,RD,2023,3,27,13,1,0,0,...,,,False,False,False,False,False,True,True,False
1088,RD_F_J_20230328,2023-03-28,RD,2023,3,28,13,1,0,0,...,,,False,False,False,False,False,True,True,False
1089,RD_F_J_20230329,2023-03-29,RD,2023,3,29,13,1,0,0,...,,,False,False,False,False,False,True,True,False
1090,RD_F_J_20230330,2023-03-30,RD,2023,3,30,13,1,0,0,...,,,False,False,False,False,False,True,True,False


In [82]:
train_final[train_final['month']==3].groupby(['item','isSunday'])['supply(kg)'].mean()

item  isSunday
BC    0            2316.326707
      1               0.000000
CB    0           48795.203670
      1               0.000000
CR    0           13303.179554
      1               0.000000
RD    0           66363.303899
      1               0.000000
TG    0            8630.705440
      1               0.000000
Name: supply(kg), dtype: float64

In [83]:
supply_mean = train_final[train_final['month'] == 3].groupby(['item', 'week'])['supply(kg)'].mean().reset_index()

In [84]:
train_final.columns

Index(['ID', 'timestamp', 'item', 'supply(kg)', 'price(원/kg)', 'year', 'month',
       'day', 'week', 'isWeekday', 'isSaturday', 'isSunday', 'holiday', '기간',
       '수출 중량', '수출 금액', '수입 중량', '수입 금액', '무역수지', 'corporation_A',
       'corporation_B', 'corporation_C', 'corporation_D', 'corporation_E',
       'corporation_F', 'location_J', 'location_S'],
      dtype='object')

In [85]:
test_final.columns

Index(['ID', 'timestamp', 'item', 'year', 'month', 'day', 'week', 'isWeekday',
       'isSaturday', 'isSunday', 'holiday', '수출 중량', '수출 금액', '수입 중량', '수입 금액',
       '무역수지', 'corporation_A', 'corporation_B', 'corporation_C',
       'corporation_D', 'corporation_E', 'corporation_F', 'location_J',
       'location_S'],
      dtype='object')

In [86]:
# test_final과 supply_mean 병합
merged_df = test_final.merge(supply_mean, left_on=['item', 'week'], right_on=['item', 'week'], how='left')

# supply_mean의 열 이름 리스트 (가정)
new_column_order= train_final.columns
new_column_order = new_column_order.drop(['price(원/kg)','기간'])
# 'item' 열 바로 다음에 supply_mean의 열들이 오도록 열 순서를 조정

# 조정된 열 순서로 데이터프레임 재배열
test_final = merged_df[new_column_order]
test_final


Unnamed: 0,ID,timestamp,item,supply(kg),year,month,day,week,isWeekday,isSaturday,...,수입 금액,무역수지,corporation_A,corporation_B,corporation_C,corporation_D,corporation_E,corporation_F,location_J,location_S
0,TG_A_J_20230304,2023-03-04,TG,10638.883250,2023,3,4,9,0,1,...,14.5,19.25,True,False,False,False,False,False,True,False
1,TG_A_J_20230305,2023-03-05,TG,10638.883250,2023,3,5,9,0,0,...,14.5,19.25,True,False,False,False,False,False,True,False
2,TG_A_J_20230306,2023-03-06,TG,9940.216071,2023,3,6,10,1,0,...,14.5,19.25,True,False,False,False,False,False,True,False
3,TG_A_J_20230307,2023-03-07,TG,9940.216071,2023,3,7,10,1,0,...,14.5,19.25,True,False,False,False,False,False,True,False
4,TG_A_J_20230308,2023-03-08,TG,9940.216071,2023,3,8,10,1,0,...,14.5,19.25,True,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,RD_F_J_20230327,2023-03-27,RD,57526.166667,2023,3,27,13,1,0,...,,,False,False,False,False,False,True,True,False
1088,RD_F_J_20230328,2023-03-28,RD,57526.166667,2023,3,28,13,1,0,...,,,False,False,False,False,False,True,True,False
1089,RD_F_J_20230329,2023-03-29,RD,57526.166667,2023,3,29,13,1,0,...,,,False,False,False,False,False,True,True,False
1090,RD_F_J_20230330,2023-03-30,RD,57526.166667,2023,3,30,13,1,0,...,,,False,False,False,False,False,True,True,False


In [87]:
# 결측치를 0으로 대체
train_final.fillna(0, inplace=True)
test_final.fillna(0, inplace=True)

In [88]:
def trade_feature(df):
    # 무역 규모
        df['무역 규모'] = df['수출 중량'] + df['수입 중량']
        df['평균 수출 가격'] = df['수출 금액'] / df['수출 중량']
        df['평균 수입 가격'] = df['수입 금액'] / df['수입 중량']
        df['무역수지 비율'] = (df['수출 금액'] - df['수입 금액']) / df['무역수지']
        return(df)

In [89]:
train_final = trade_feature(train_final)
test_final = trade_feature(test_final)
display(train_final)
display(train_final.columns)

Unnamed: 0,ID,timestamp,item,supply(kg),price(원/kg),year,month,day,week,isWeekday,...,corporation_C,corporation_D,corporation_E,corporation_F,location_J,location_S,무역 규모,평균 수출 가격,평균 수입 가격,무역수지 비율
0,TG_A_J_20190101,2019-01-01,TG,0.0,0.0,2019,1,1,1,1,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
1,TG_A_J_20190102,2019-01-02,TG,0.0,0.0,2019,1,2,1,1,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
2,TG_A_J_20190103,2019-01-03,TG,60601.0,1728.0,2019,1,3,1,1,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
3,TG_A_J_20190104,2019-01-04,TG,25000.0,1408.0,2019,1,4,1,1,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
4,TG_A_J_20190105,2019-01-05,TG,32352.0,1250.0,2019,1,5,1,0,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,452440.0,468.0,2023,2,27,9,1,...,False,False,False,True,True,False,0.0,,,
59393,RD_F_J_20230228,2023-02-28,RD,421980.0,531.0,2023,2,28,9,1,...,False,False,False,True,True,False,0.0,,,
59394,RD_F_J_20230301,2023-03-01,RD,382980.0,574.0,2023,3,1,9,1,...,False,False,False,True,True,False,0.0,,,
59395,RD_F_J_20230302,2023-03-02,RD,477220.0,523.0,2023,3,2,9,1,...,False,False,False,True,True,False,0.0,,,


Index(['ID', 'timestamp', 'item', 'supply(kg)', 'price(원/kg)', 'year', 'month',
       'day', 'week', 'isWeekday', 'isSaturday', 'isSunday', 'holiday', '기간',
       '수출 중량', '수출 금액', '수입 중량', '수입 금액', '무역수지', 'corporation_A',
       'corporation_B', 'corporation_C', 'corporation_D', 'corporation_E',
       'corporation_F', 'location_J', 'location_S', '무역 규모', '평균 수출 가격',
       '평균 수입 가격', '무역수지 비율'],
      dtype='object')

In [90]:
# train_final.drop(['기간'],axis=1,inplace=True)
# test_final.drop(['기간'],axis=1,inplace=True)

In [91]:
display(train_final)
display(train_final.columns)

Unnamed: 0,ID,timestamp,item,supply(kg),price(원/kg),year,month,day,week,isWeekday,...,corporation_C,corporation_D,corporation_E,corporation_F,location_J,location_S,무역 규모,평균 수출 가격,평균 수입 가격,무역수지 비율
0,TG_A_J_20190101,2019-01-01,TG,0.0,0.0,2019,1,1,1,1,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
1,TG_A_J_20190102,2019-01-02,TG,0.0,0.0,2019,1,2,1,1,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
2,TG_A_J_20190103,2019-01-03,TG,60601.0,1728.0,2019,1,3,1,1,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
3,TG_A_J_20190104,2019-01-04,TG,25000.0,1408.0,2019,1,4,1,1,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
4,TG_A_J_20190105,2019-01-05,TG,32352.0,1250.0,2019,1,5,1,0,...,False,False,False,False,True,False,58368.0,0.002947,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,452440.0,468.0,2023,2,27,9,1,...,False,False,False,True,True,False,0.0,,,
59393,RD_F_J_20230228,2023-02-28,RD,421980.0,531.0,2023,2,28,9,1,...,False,False,False,True,True,False,0.0,,,
59394,RD_F_J_20230301,2023-03-01,RD,382980.0,574.0,2023,3,1,9,1,...,False,False,False,True,True,False,0.0,,,
59395,RD_F_J_20230302,2023-03-02,RD,477220.0,523.0,2023,3,2,9,1,...,False,False,False,True,True,False,0.0,,,


Index(['ID', 'timestamp', 'item', 'supply(kg)', 'price(원/kg)', 'year', 'month',
       'day', 'week', 'isWeekday', 'isSaturday', 'isSunday', 'holiday', '기간',
       '수출 중량', '수출 금액', '수입 중량', '수입 금액', '무역수지', 'corporation_A',
       'corporation_B', 'corporation_C', 'corporation_D', 'corporation_E',
       'corporation_F', 'location_J', 'location_S', '무역 규모', '평균 수출 가격',
       '평균 수입 가격', '무역수지 비율'],
      dtype='object')

In [92]:
# 결측치를 0으로 대체
train_final.fillna(0, inplace=True)
test_final.fillna(0, inplace=True)

In [93]:
display(train_final.columns)
display(test_final.columns)

Index(['ID', 'timestamp', 'item', 'supply(kg)', 'price(원/kg)', 'year', 'month',
       'day', 'week', 'isWeekday', 'isSaturday', 'isSunday', 'holiday', '기간',
       '수출 중량', '수출 금액', '수입 중량', '수입 금액', '무역수지', 'corporation_A',
       'corporation_B', 'corporation_C', 'corporation_D', 'corporation_E',
       'corporation_F', 'location_J', 'location_S', '무역 규모', '평균 수출 가격',
       '평균 수입 가격', '무역수지 비율'],
      dtype='object')

Index(['ID', 'timestamp', 'item', 'supply(kg)', 'year', 'month', 'day', 'week',
       'isWeekday', 'isSaturday', 'isSunday', 'holiday', '수출 중량', '수출 금액',
       '수입 중량', '수입 금액', '무역수지', 'corporation_A', 'corporation_B',
       'corporation_C', 'corporation_D', 'corporation_E', 'corporation_F',
       'location_J', 'location_S', '무역 규모', '평균 수출 가격', '평균 수입 가격', '무역수지 비율'],
      dtype='object')

In [94]:
train_final.to_csv("./train_final3.csv")
test_final.to_csv("./test_final3.csv")