In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

In [2]:
train = pd.read_csv('../DAT/train.csv')

In [3]:
train['item_id'] = train.ID.str[0:6]

In [4]:
train.rename(columns={'supply(kg)' : 'supply',
                      'price(원/kg)' : 'price'}, inplace=True)

In [5]:
train['timestamp']=pd.to_datetime(train['timestamp'])

for train in [train]:
    train['year']=train['timestamp'].dt.year
    train['month']=train['timestamp'].dt.month
    train['day']=train['timestamp'].dt.day
    train['weekdays']=train['timestamp'].dt.weekday

In [6]:
import holidays
kr_holidays = holidays.KR()
# generate holiday table
train['holiday'] = train['timestamp'].apply(lambda x: 1 if x in kr_holidays else 0)

In [7]:
train_TG_index=train[train['item']=='TG'].index
train_BC_index=train[train['item']=='BC'].index
train_RD_index=train[train['item']=='RD'].index
train_CR_index=train[train['item']=='CR'].index
train_CB_index=train[train['item']=='CB'].index
train_index=[train_TG_index,train_BC_index,
             train_RD_index,train_CR_index,
             train_CB_index]

In [8]:
train_TG=train[train['item']=='TG'].reset_index(drop=True)
train_BC=train[train['item']=='BC'].reset_index(drop=True)
train_RD=train[train['item']=='RD'].reset_index(drop=True)
train_CR=train[train['item']=='CR'].reset_index(drop=True)
train_CB=train[train['item']=='CB'].reset_index(drop=True)

In [9]:
def make_train_list(dataset):
    corp_list=dataset['corporation'].unique().tolist()
    loc_list=dataset['location'].unique().tolist()
    
    dataset_len=[]
    for corp in corp_list:
        for loc in loc_list:
            dataset_len.append(len(dataset[(dataset['corporation']==corp)&
                                              (dataset['location']==loc)]))
    train_list=[]
    k=0
    for j in range(0, len(dataset_len)):
        if j == 0:
            train_list.append(dataset.loc[0:dataset_len[0]-1,:].reset_index(drop=True))
            k=dataset_len[0]
        else:
            train_list.append(dataset.loc[k:k+dataset_len[j]-1,:].reset_index(drop=True))            
            k=k+dataset_len[j]
            
    len_zero=[]            
    for i in range(0,len(train_list)):
        if len(train_list[i]) == 0:
            len_zero.append(i)
    if len(len_zero)!=0: 
        train_list = np.delete(train_list, len_zero)     
    
    for k in range(0, len(train_list)):
        print(train_list[k]['item'].unique() , train_list[k]['corporation'].unique() , train_list[k]['location'].unique() , len(train_list[k]))
    return train_list  

In [10]:
def pre_interpolate(train_list):
    train_list1=train_list[(train_list['weekdays'] != 6) & ~((train_list['month']==1)&(train_list['day']==1))]
    train_list1['price'].replace(0, np.nan, inplace=True)
    train_list[(train_list['weekdays'] != 6) & ~((train_list['month']==1)&(train_list['day']==1))]=train_list1
    print(train_list)
    return train_list

In [11]:
def interpolate(train_list):
    for i in range(0, len(train_list)):
        missing_ranges = []
        start_date = None
        for j, row in train_list[i].iterrows():
            if pd.isna(row['price']):
                if start_date is None:
                    start_date = row['timestamp']
            elif start_date is not None:
                if j - 1 >= 0:
                    missing_ranges.append({'start': start_date, 'end': train_list[i].loc[j-1, 'timestamp']})
                    start_date = None

        # 찾은 구간을 NaN으로 처리
        for missing_range in missing_ranges:
            train_list[i].loc[(train_list[i]['timestamp'] >= missing_range['start']) &
                                 (train_list[i]['timestamp'] <= missing_range['end']), 'price'] = np.nan

        # 나머지 결측값 보간
        train_list[i]['price'] = train_list[i]['price'].interpolate(method='linear')
    return train_list

In [12]:
def weekmean(train_list):
    for i in range(0, len(train_list)):
        week_mean = train_list[i].groupby([pd.Grouper(key='timestamp', freq='W-SUN')]).mean()['price'].reset_index()
        week_mean['price'] = (week_mean['price']*7)/6
        merged_df = pd.merge(train_list[i], week_mean,
                             left_on=pd.to_datetime(train_list[i]['timestamp']).dt.to_period("W-SUN"),
                             right_on=pd.to_datetime(week_mean['timestamp']).dt.to_period("W-SUN"),
                             suffixes=('', '_주간평균'))
        merged_df=merged_df.drop(columns=['key_0', 'timestamp_주간평균'])
        train_list[i]=merged_df
        train_list[i].loc[train_list[i]['weekdays']==6, 'price_주간평균']=0
    return train_list

In [13]:
def diff(train_list):
    for i in range(0, len(train_list)):
        train_list[i]['diff']=train_list[i]['price'].diff()
        train_list[i].loc[0, 'diff']=0 
    return train_list

In [14]:
def roc(train_list):
    for i in range(0, len(train_list)):
        train_list[i]['roc']= (train_list[i]['price'].diff() / train_list[i]['price'])
        train_list[i].loc[0, 'roc']=0
        train_list[i]['roc'] = train_list[i]['roc'].replace(-np.inf, np.nan)
        train_list[i]['roc'].fillna(0, inplace=True)
    return train_list

In [15]:
def make_derived_variable(train_list):
    train_list=weekmean(train_list)
    train_list=diff(train_list)
    train_list=roc(train_list)
    return train_list

### 감귤(TG) 결측치 보간

In [16]:
train_TG1=train_TG[train_TG['weekdays'] != 6]
train_TG1['price'].replace(0, np.nan, inplace=True)
train_TG1.loc[(train_TG1['month']==1)&(train_TG1['day']==1), 'price']=0
train_TG1['price']=train_TG1['price'].interpolate(method='linear')
train_TG1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_TG1['price'].replace(0, np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_TG1['price']=train_TG1['price'].interpolate(method='linear')


Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,item_id,year,month,day,weekdays,holiday
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0,TG_A_J,2019,1,1,1,1
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,864.0,TG_A_J,2019,1,2,2,0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0,TG_A_J,2019,1,3,3,0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0,TG_A_J,2019,1,4,4,0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0,TG_A_J,2019,1,5,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15225,TG_E_S_20230227,2023-02-27,TG,E,S,24204.0,3418.0,TG_E_S,2023,2,27,0,0
15226,TG_E_S_20230228,2023-02-28,TG,E,S,13587.0,3141.0,TG_E_S,2023,2,28,1,0
15227,TG_E_S_20230301,2023-03-01,TG,E,S,16187.0,4235.0,TG_E_S,2023,3,1,2,1
15228,TG_E_S_20230302,2023-03-02,TG,E,S,17830.0,3960.0,TG_E_S,2023,3,2,3,0


In [17]:
train_TG[train_TG['weekdays'] != 6]=train_TG1

In [18]:
train_TG_list=make_train_list(train_TG)

['TG'] ['A'] ['J'] 1523
['TG'] ['A'] ['S'] 1523
['TG'] ['B'] ['J'] 1523
['TG'] ['B'] ['S'] 1523
['TG'] ['C'] ['J'] 1523
['TG'] ['C'] ['S'] 1523
['TG'] ['D'] ['J'] 1523
['TG'] ['D'] ['S'] 1523
['TG'] ['E'] ['J'] 1523
['TG'] ['E'] ['S'] 1523


In [19]:
train_TG_list=weekmean(train_TG_list)
train_TG_list=diff(train_TG_list)
train_TG_list=roc(train_TG_list)

### 브로콜리(BC) 결측치 보간

In [20]:
train_BC=pre_interpolate(train_BC)
train_BC_list=make_train_list(train_BC)
train_BC_list=interpolate(train_BC_list)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_list1['price'].replace(0, np.nan, inplace=True)
  arr = asarray(arr)


                    ID  timestamp item corporation location   supply   price  \
0      BC_A_J_20190101 2019-01-01   BC           A        J      0.0     0.0   
1      BC_A_J_20190102 2019-01-02   BC           A        J      0.0     NaN   
2      BC_A_J_20190103 2019-01-03   BC           A        J   7616.0  2559.0   
3      BC_A_J_20190104 2019-01-04   BC           A        J   7488.0  2425.0   
4      BC_A_J_20190105 2019-01-05   BC           A        J  10408.0  2097.0   
...                ...        ...  ...         ...      ...      ...     ...   
13702  BC_E_S_20230227 2023-02-27   BC           E        S   2200.0  2488.0   
13703  BC_E_S_20230228 2023-02-28   BC           E        S   1024.0  3232.0   
13704  BC_E_S_20230301 2023-03-01   BC           E        S   2160.0  3816.0   
13705  BC_E_S_20230302 2023-03-02   BC           E        S   1152.0  3321.0   
13706  BC_E_S_20230303 2023-03-03   BC           E        S   1336.0  2939.0   

      item_id  year  month  day  weekda

In [21]:
train_BC_list=make_derived_variable(train_BC_list)

### 무(RD) 결측치 보간

In [22]:
train_RD=pre_interpolate(train_RD)
train_RD_list=make_train_list(train_RD)
train_RD_list=interpolate(train_RD_list)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_list1['price'].replace(0, np.nan, inplace=True)
  arr = asarray(arr)


                    ID  timestamp item corporation location    supply  price  \
0      RD_A_J_20190101 2019-01-01   RD           A        J       0.0    0.0   
1      RD_A_J_20190102 2019-01-02   RD           A        J       0.0    NaN   
2      RD_A_J_20190103 2019-01-03   RD           A        J   37060.0  367.0   
3      RD_A_J_20190104 2019-01-04   RD           A        J   19260.0  460.0   
4      RD_A_J_20190105 2019-01-05   RD           A        J   32140.0  402.0   
...                ...        ...  ...         ...      ...       ...    ...   
12179  RD_F_J_20230227 2023-02-27   RD           F        J  452440.0  468.0   
12180  RD_F_J_20230228 2023-02-28   RD           F        J  421980.0  531.0   
12181  RD_F_J_20230301 2023-03-01   RD           F        J  382980.0  574.0   
12182  RD_F_J_20230302 2023-03-02   RD           F        J  477220.0  523.0   
12183  RD_F_J_20230303 2023-03-03   RD           F        J  427520.0  529.0   

      item_id  year  month  day  weekda

In [23]:
train_RD_list=make_derived_variable(train_RD_list)

### 당근(CR) 결측치 보간

In [24]:
train_CR=pre_interpolate(train_CR)
train_CR_list=make_train_list(train_CR)
train_CR_list=interpolate(train_CR_list)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_list1['price'].replace(0, np.nan, inplace=True)
  arr = asarray(arr)


                    ID  timestamp item corporation location   supply   price  \
0      CR_A_J_20190101 2019-01-01   CR           A        J      0.0     0.0   
1      CR_A_J_20190102 2019-01-02   CR           A        J      0.0     NaN   
2      CR_A_J_20190103 2019-01-03   CR           A        J      0.0     NaN   
3      CR_A_J_20190104 2019-01-04   CR           A        J  10240.0  1141.0   
4      CR_A_J_20190105 2019-01-05   CR           A        J   8680.0  1133.0   
...                ...        ...  ...         ...      ...      ...     ...   
10656  CR_E_S_20230227 2023-02-27   CR           E        S      0.0     NaN   
10657  CR_E_S_20230228 2023-02-28   CR           E        S      0.0     NaN   
10658  CR_E_S_20230301 2023-03-01   CR           E        S      0.0     NaN   
10659  CR_E_S_20230302 2023-03-02   CR           E        S      0.0     NaN   
10660  CR_E_S_20230303 2023-03-03   CR           E        S      0.0     NaN   

      item_id  year  month  day  weekda

In [25]:
train_CR_list=make_derived_variable(train_CR_list)

### 양배추(CB) 결측치 보간

In [26]:
train_CB=pre_interpolate(train_CB)
train_CB_list=make_train_list(train_CB)
train_CB_list=interpolate(train_CB_list)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_list1['price'].replace(0, np.nan, inplace=True)
  arr = asarray(arr)


                   ID  timestamp item corporation location    supply  price  \
0     CB_A_J_20190101 2019-01-01   CB           A        J       0.0    0.0   
1     CB_A_J_20190102 2019-01-02   CB           A        J       0.0    NaN   
2     CB_A_J_20190103 2019-01-03   CB           A        J       0.0    NaN   
3     CB_A_J_20190104 2019-01-04   CB           A        J       0.0    NaN   
4     CB_A_J_20190105 2019-01-05   CB           A        J    4112.0  374.0   
...               ...        ...  ...         ...      ...       ...    ...   
7610  CB_F_J_20230227 2023-02-27   CB           F        J  232312.0  652.0   
7611  CB_F_J_20230228 2023-02-28   CB           F        J  224072.0  672.0   
7612  CB_F_J_20230301 2023-03-01   CB           F        J  273800.0  621.0   
7613  CB_F_J_20230302 2023-03-02   CB           F        J  238992.0  653.0   
7614  CB_F_J_20230303 2023-03-03   CB           F        J  206360.0  643.0   

     item_id  year  month  day  weekdays  holiday  

In [27]:
train_CB_list=make_derived_variable(train_CB_list)

### 모든 item에 대한 데이터 프레임 합치기

In [28]:
train_item_list=[train_TG_list,train_BC_list,
                 train_RD_list,train_CR_list,
                 train_CB_list]
dfs=[]
for item_list in train_item_list:
    result=pd.concat(item_list, ignore_index=True)
    dfs.append(result)

In [29]:
for i in range(0, 5):
    dfs[i].index=train_index[i]
train_df=pd.concat(dfs, ignore_index=False)

In [30]:
train_df=train_df.sort_index()
train_df.loc[train_df['weekdays']==6, 'Sunday']=1
train_df.loc[train_df['weekdays']!=6, 'Sunday']=0

In [31]:
train_df.tail(10)

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,item_id,year,month,day,weekdays,holiday,price_주간평균,diff,roc,Sunday
59387,RD_F_J_20230222,2023-02-22,RD,F,J,512400.0,440.0,RD_F_J,2023,2,22,2,0,449.0,-35.0,-0.079545,0.0
59388,RD_F_J_20230223,2023-02-23,RD,F,J,478360.0,396.0,RD_F_J,2023,2,23,3,0,449.0,-44.0,-0.111111,0.0
59389,RD_F_J_20230224,2023-02-24,RD,F,J,466440.0,388.0,RD_F_J,2023,2,24,4,0,449.0,-8.0,-0.020619,0.0
59390,RD_F_J_20230225,2023-02-25,RD,F,J,250580.0,429.0,RD_F_J,2023,2,25,5,0,449.0,41.0,0.095571,0.0
59391,RD_F_J_20230226,2023-02-26,RD,F,J,0.0,0.0,RD_F_J,2023,2,26,6,0,0.0,-429.0,0.0,1.0
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0,RD_F_J,2023,2,27,0,0,612.5,468.0,1.0,0.0
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0,RD_F_J,2023,2,28,1,0,612.5,63.0,0.118644,0.0
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0,RD_F_J,2023,3,1,2,1,612.5,43.0,0.074913,0.0
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0,RD_F_J,2023,3,2,3,0,612.5,-51.0,-0.097514,0.0
59396,RD_F_J_20230303,2023-03-03,RD,F,J,427520.0,529.0,RD_F_J,2023,3,3,4,0,612.5,6.0,0.011342,0.0


In [32]:
train_df.drop(columns=['ID', 'item', 'corporation', 'location',
                       'supply', 'weekdays', 'holiday', 'diff', 'roc', 'Sunday', 'year', 'month', 'day'], inplace=True)

In [33]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 59397 entries, 0 to 59396
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   timestamp   59397 non-null  datetime64[ns]
 1   price       59397 non-null  float64       
 2   item_id     59397 non-null  object        
 3   price_주간평균  59397 non-null  float64       
dtypes: datetime64[ns](1), float64(2), object(1)
memory usage: 2.3+ MB


## Run

In [34]:
data = TimeSeriesDataFrame(train_df)
predictor = TimeSeriesPredictor( 
    prediction_length=28,
    target="price",
    eval_metric="RMSE",
)
# seed 고정
predictor.fit( data, random_seed=42, )

TimeSeriesPredictor.fit() called
Fitting with arguments:
{'enable_ensemble': True,
 'evaluation_metric': 'RMSE',
 'excluded_model_types': None,
 'hyperparameter_tune_kwargs': None,
 'hyperparameters': 'default',
 'num_val_windows': 1,
 'prediction_length': 28,
 'random_seed': 42,
 'target': 'price',
 'time_limit': None,
 'verbosity': 2}
Provided training data set with 59397 rows, 39 items (item = single time series). Average time series length is 1523.0. Data frequency is 'D'.
Global seed set to 42
AutoGluon will save models to AutogluonModels\ag-20231113_161333\
AutoGluon will gauge predictive performance using evaluation metric: 'RMSE'
	This metric's sign has been flipped to adhere to being 'higher is better'. The reported score can be multiplied by -1 to get the metric value.

Provided dataset contains following columns:
	target:           'price'
	past covariates:  ['price_주간평균']

Starting training. Start time is 2023-11-14 01:13:33
Models that will be trained: ['Naive', 'SeasonalN

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x2324f7fde80>

In [35]:
predictor.refit_full()

Refitting models via `refit_full` using all of the data (combined train and validation)...
	Models trained in this way will have the suffix '_FULL' and have NaN validation score.
	This process is not bound by time_limit, but should take less time than the original `fit` call.
Fitting model: Naive_FULL | Skipping fit via cloning parent ...
Fitting model: SeasonalNaive_FULL | Skipping fit via cloning parent ...
Fitting model: Theta_FULL | Skipping fit via cloning parent ...
Fitting model: AutoETS_FULL | Skipping fit via cloning parent ...
Fitting model: RecursiveTabular_FULL
	3.89    s     = Training runtime
Fitting model: DeepAR_FULL | Skipping fit via cloning parent ...
Fitting model: WeightedEnsemble_FULL | Skipping fit via cloning parent ...
Refit complete. Models trained: ['Naive_FULL', 'SeasonalNaive_FULL', 'Theta_FULL', 'AutoETS_FULL', 'RecursiveTabular_FULL', 'DeepAR_FULL', 'WeightedEnsemble_FULL']
Total runtime: 4.14 s
Updated best model to 'WeightedEnsemble_FULL' (Previously 'W

{'Naive': 'Naive_FULL',
 'SeasonalNaive': 'SeasonalNaive_FULL',
 'Theta': 'Theta_FULL',
 'AutoETS': 'AutoETS_FULL',
 'RecursiveTabular': 'RecursiveTabular_FULL',
 'DeepAR': 'DeepAR_FULL',
 'WeightedEnsemble': 'WeightedEnsemble_FULL'}

In [36]:
# seed 고정
pred = predictor.predict(data, random_seed=42, )

Global seed set to 42
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble_FULL


In [37]:
pred=pred.round(0)
pred

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
TG_A_J,2023-03-04,3566.0,2325.0,2740.0,3039.0,3323.0,3578.0,3838.0,4103.0,4410.0,4864.0
TG_A_J,2023-03-05,415.0,-1248.0,-660.0,-270.0,68.0,381.0,714.0,1045.0,1442.0,2014.0
TG_A_J,2023-03-06,2973.0,930.0,1648.0,2146.0,2578.0,2947.0,3341.0,3748.0,4240.0,4924.0
TG_A_J,2023-03-07,3528.0,1284.0,2039.0,2578.0,3052.0,3485.0,3929.0,4394.0,4944.0,5712.0
TG_A_J,2023-03-08,3128.0,693.0,1535.0,2132.0,2645.0,3116.0,3597.0,4112.0,4729.0,5593.0
...,...,...,...,...,...,...,...,...,...,...,...
RD_F_J,2023-03-27,550.0,-45.0,159.0,304.0,432.0,549.0,662.0,787.0,930.0,1141.0
RD_F_J,2023-03-28,552.0,-55.0,154.0,304.0,431.0,553.0,676.0,800.0,945.0,1151.0
RD_F_J,2023-03-29,556.0,-70.0,142.0,301.0,431.0,554.0,672.0,805.0,959.0,1168.0
RD_F_J,2023-03-30,509.0,-118.0,96.0,252.0,385.0,506.0,627.0,759.0,915.0,1126.0


In [38]:
submission = pd.read_csv('../DAT/sample_submission.csv')
submission['answer'] = pred.reset_index()['mean']

In [39]:
submission[ submission['answer'] < 0.0]

Unnamed: 0,ID,answer
169,TG_D_J_20230305,-348.0
176,TG_D_J_20230312,-334.0
183,TG_D_J_20230319,-265.0
190,TG_D_J_20230326,-273.0
225,TG_E_J_20230305,-41.0
449,CR_E_S_20230305,-3.0
560,CB_E_J_20230304,-21.0
561,CB_E_J_20230305,-40.0
568,CB_E_J_20230312,-34.0
575,CB_E_J_20230319,-39.0


In [38]:
submission.loc[submission['ID'].str.contains("_20230305"), 'answer']=0
submission.loc[submission['ID'].str.contains("_20230312"), 'answer']=0
submission.loc[submission['ID'].str.contains("_20230319"), 'answer']=0
submission.loc[submission['ID'].str.contains("_20230326"), 'answer']=0

In [39]:
submission.to_csv('../DAT/submission1.csv', index=False)
submission

Unnamed: 0,ID,answer
0,TG_A_J_20230304,3566.0
1,TG_A_J_20230305,0.0
2,TG_A_J_20230306,2973.0
3,TG_A_J_20230307,3528.0
4,TG_A_J_20230308,3128.0
...,...,...
1087,RD_F_J_20230327,550.0
1088,RD_F_J_20230328,552.0
1089,RD_F_J_20230329,556.0
1090,RD_F_J_20230330,509.0


In [40]:
submission.head(30)

Unnamed: 0,ID,answer
0,TG_A_J_20230304,3566.0
1,TG_A_J_20230305,0.0
2,TG_A_J_20230306,2973.0
3,TG_A_J_20230307,3528.0
4,TG_A_J_20230308,3128.0
5,TG_A_J_20230309,3110.0
6,TG_A_J_20230310,3290.0
7,TG_A_J_20230311,3400.0
8,TG_A_J_20230312,0.0
9,TG_A_J_20230313,2945.0
