# Import

In [1]:
import sys
import os
import datetime
import pandas as pd
sys.path.append("../")
sys.path.append("../..")

# data load
from Clust.setting import influx_setting_KETI as ins
from Clust.clust.ingestion.influx import influx_Client_v2 as influx_Client, bucket_data

# split by timestep
from Clust.clust.transformation.splitDataByCondition import timeStep

# setting common parameter : influxDB instance
db_client = influx_Client.influxClient(ins.CLUSTDataServer2)

- TimeSeries를 입력 Timestep을 기준으로 데이터를 나누는 것을 보여준다.
- 아래 순서에 따라 테스트가 진행
    1. Input : 단일 Dataframe
    2. Input : 특정 Bucket의 Dataset
- Timestep을 기준으로 데이터를 나누는 모든 챕터들은 아래와 같은 진행 순서를 갖는다.
    1. Data Preparation
        - duration : 2022.02.01~2022.02.28
        - kWeather Indoor 초등학교 
    2. splitDataByCondition

# 1. Split data by timestep with inputting Dataframe

## 1-1. Data Preparation

In [3]:
## 1. Data Information
bucket_name ='air_indoor_초등학교'
measurement_name = "ICW0W2000025"

## 2. Time
start_time = pd.to_datetime("2022-02-01 00:00:00")
end_time = pd.to_datetime("2022-02-28 23:59:59")

## 3. time step criteria
timestep_criteria = {"step":[0, 6, 12, 17, 20, 24], "label":["dawn", "morning", "afternoon", "evening", "night"]}

In [4]:
data = db_client.get_data_by_time(start_time, end_time, bucket_name, measurement_name)

In [5]:
data

Unnamed: 0_level_0,in_co2,in_humi,in_noise,in_pm01,in_pm10,in_pm25,in_temp,in_voc
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-02-01 00:00:00+00:00,397.0,20.0,47.0,43.0,53.0,46.0,15.5,7.0
2022-02-01 00:01:00+00:00,399.0,20.0,48.0,43.0,53.0,46.0,15.5,6.0
2022-02-01 00:02:00+00:00,407.0,20.0,48.0,43.0,53.0,46.0,15.5,6.0
2022-02-01 00:03:00+00:00,398.0,20.0,48.0,43.0,56.0,47.0,15.5,6.0
2022-02-01 00:04:00+00:00,401.0,20.0,48.0,43.0,55.0,46.0,15.5,7.0
...,...,...,...,...,...,...,...,...
2022-02-28 23:55:00+00:00,404.0,24.0,46.0,21.0,26.0,22.0,18.5,6.0
2022-02-28 23:56:00+00:00,403.0,24.0,46.0,21.0,26.0,22.0,18.5,5.0
2022-02-28 23:57:00+00:00,400.0,24.0,46.0,21.0,25.0,22.0,18.5,7.0
2022-02-28 23:58:00+00:00,403.0,24.0,46.0,21.0,26.0,22.0,18.5,6.0


## 1-2. Split Data by timestep

In [6]:
split_data_result = timeStep.get_split_data_by_timestep_from_dataframe(data, timestep_criteria)

In [7]:
split_data_result

{'dawn':                            in_co2  in_humi  in_noise  in_pm01  in_pm10  \
 time                                                                     
 2022-02-01 00:00:00+00:00   397.0     20.0      47.0     43.0     53.0   
 2022-02-01 00:01:00+00:00   399.0     20.0      48.0     43.0     53.0   
 2022-02-01 00:02:00+00:00   407.0     20.0      48.0     43.0     53.0   
 2022-02-01 00:03:00+00:00   398.0     20.0      48.0     43.0     56.0   
 2022-02-01 00:04:00+00:00   401.0     20.0      48.0     43.0     55.0   
 ...                           ...      ...       ...      ...      ...   
 2022-02-28 05:55:00+00:00   385.0     21.0      47.0     10.0     14.0   
 2022-02-28 05:56:00+00:00   394.0     21.0      47.0     10.0     11.0   
 2022-02-28 05:57:00+00:00   390.0     21.0      47.0     10.0     11.0   
 2022-02-28 05:58:00+00:00   387.0     21.0      47.0     11.0     12.0   
 2022-02-28 05:59:00+00:00   387.0     21.0      47.0     11.0     12.0   
 
               

In [8]:
split_data_result.keys()

dict_keys(['dawn', 'morning', 'afternoon', 'evening', 'night'])

# 2. Split data by timestep with Dataset

In [9]:
## 1. Data Information
bucket_name = "air_indoor_초등학교"

## 2. Time
start_time = pd.to_datetime("2022-02-01 00:00:00")
end_time = pd.to_datetime("2022-02-28 23:59:59")

## 3. time step criteria
timestep_criteria = {"step":[0, 6, 12, 17, 20, 24], "label":["dawn", "morning", "afternoon", "evening", "night"]}

In [10]:
dataset = bucket_data.get_all_msData_in_oneBucket(start_time, end_time, db_client, bucket_name)

## 2-4. Split Data by holiday

In [12]:
split_dataset_result = timeStep.get_split_data_by_timestep_from_dataset(dataset, timestep_criteria)

In [13]:
split_dataset_result

{'ICW0W2000022': {'dawn':                            in_co2  in_humi  in_noise  in_pm01  in_pm10  \
  time                                                                     
  2022-02-01 00:00:00+00:00   471.0     26.0      48.0     29.0     33.0   
  2022-02-01 00:01:00+00:00   470.0     26.0      47.0     28.0     32.0   
  2022-02-01 00:02:00+00:00   471.0     26.0      47.0     28.0     31.0   
  2022-02-01 00:03:00+00:00   463.0     26.0      47.0     28.0     32.0   
  2022-02-01 00:04:00+00:00   468.0     26.0      48.0     28.0     33.0   
  ...                           ...      ...       ...      ...      ...   
  2022-02-28 05:55:00+00:00   452.0     25.0      47.0      7.0      8.0   
  2022-02-28 05:56:00+00:00   452.0     25.0      47.0      7.0      8.0   
  2022-02-28 05:57:00+00:00   456.0     25.0      47.0      7.0      8.0   
  2022-02-28 05:58:00+00:00   453.0     25.0      47.0      8.0      9.0   
  2022-02-28 05:59:00+00:00   453.0     25.0      47.0      7.0 

In [14]:
split_dataset_result.keys()

dict_keys(['ICW0W2000022', 'ICW0W2000023', 'ICW0W2000024', 'ICW0W2000025', 'ICW0W2000030', 'ICW0W2000031', 'ICW0W2000032', 'ICW0W2000033', 'ICW0W2000034'])

In [15]:
split_dataset_result["ICW0W2000022"]

{'dawn':                            in_co2  in_humi  in_noise  in_pm01  in_pm10  \
 time                                                                     
 2022-02-01 00:00:00+00:00   471.0     26.0      48.0     29.0     33.0   
 2022-02-01 00:01:00+00:00   470.0     26.0      47.0     28.0     32.0   
 2022-02-01 00:02:00+00:00   471.0     26.0      47.0     28.0     31.0   
 2022-02-01 00:03:00+00:00   463.0     26.0      47.0     28.0     32.0   
 2022-02-01 00:04:00+00:00   468.0     26.0      48.0     28.0     33.0   
 ...                           ...      ...       ...      ...      ...   
 2022-02-28 05:55:00+00:00   452.0     25.0      47.0      7.0      8.0   
 2022-02-28 05:56:00+00:00   452.0     25.0      47.0      7.0      8.0   
 2022-02-28 05:57:00+00:00   456.0     25.0      47.0      7.0      8.0   
 2022-02-28 05:58:00+00:00   453.0     25.0      47.0      8.0      9.0   
 2022-02-28 05:59:00+00:00   453.0     25.0      47.0      7.0      7.0   
 
               