In [10]:
import pandas as pd
from feature.base_dataset import limit_range

In [11]:
def moving_average(input_list):
    """make moving_average dataset

    Args:
        input_list (list): train_input_list

    Returns:
        DataFrame: moving_average time dataset
    """    
    df_moving = pd.DataFrame()
    for i in input_list:
        df = pd.read_csv(i)
        df = limit_range(df)
        df = df.fillna(method='ffill')
        df = df.drop(['DAT', 'obs_time', '일간누적분무량', '일간누적백색광량',
                    '일간누적적색광량', '일간누적청색광량', '일간누적총광량'], axis=1)
           
        ma = df.rolling(3, min_periods=1).mean()
        ma['time'] = [i%24 for i in range(len(ma))]
        ma['DAT'] = [i//24 for i in range(len(ma))]

        df = pd.pivot_table(ma, index=['DAT'], columns=['time'], aggfunc='mean')
        df.columns = [''.join(str(col)) for col in df.columns]
        df = df.reset_index()    
        
        df_moving = pd.concat([df_moving, df])
    return df_moving
    

In [12]:
from glob import glob
train_input_list = sorted(glob('./data/train_input/*.csv'))
train_target_list = sorted(glob('./data/train_target/*.csv'))

test_input_list = sorted(glob('./data/test_input/*.csv'))
test_target_list = sorted(glob('./data/test_target/*.csv'))

In [13]:
train = moving_average(train_input_list)
test = moving_average(test_input_list)

In [15]:
train.to_csv('train_roling.csv', index=False)
test.to_csv('test_roling.csv', index=False)

In [None]:
def make_dataset(all_input_list, all_target_list):
    '''
    Train, Test데이터를 하나의 데이터 프레임으로 변경
    '''
    df_all = pd.DataFrame()
    df_all2 = pd.DataFrame()
    length = len(all_input_list)
    for idx in range(length):
        X = pd.read_csv(all_input_list[idx])
        y = pd.read_csv(all_target_list[idx])
        q5 = X['시간당분무량'].quantile(0.05)
        q95 = X['시간당분무량'].quantile(0.95)
        X1 = X.clip(q5, q95, axis=1)
        y['DAT'] = y['DAT']-1
        df_concat = pd.merge(X1, y, on='DAT', how='left')
        df_concat2 = pd.merge(X, y, on='DAT', how='left')
        df_concat['Case'] = idx+1
        df_all = pd.concat([df_all, df_concat])
        df_all2 = pd.concat([df_all2, df_concat2])
    return df_all, df_all2