In [1]:
import os
import numpy as np
import pandas as pd
import time
import functools

from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_log_error as msle
from sklearn.model_selection import StratifiedKFold, KFold
import matplotlib.pyplot as plt
%matplotlib inline


In [32]:
df_hour = pd.read_csv(r'data/data202605/hourly_dataset.csv')
df_test = pd.read_csv(r'data/data202605/test_public.csv')
df_sub = pd.read_csv(r'data/data202605/sample_submission.csv')

In [5]:
df_hour.head()

Unnamed: 0,time,flow_1,flow_2,flow_3,flow_4,flow_5,flow_6,flow_7,flow_8,flow_9,...,flow_12,flow_13,flow_14,flow_15,flow_16,flow_17,flow_18,flow_19,flow_20,train or test
0,2022-01-01 01:00:00,29.7,14.6,54.7,40.1,3.0,49.7,10.9,1.1,5.0,...,2.914,1.7,3.2,1.3,3.5,6.8,,1.806,1.4,train
1,2022-01-01 02:00:00,21.9,9.0,38.0,27.7,2.4,30.2,6.4,0.4,2.6,...,1.108,1.3,2.2,0.8,2.3,4.5,,3.847,0.8,train
2,2022-01-01 03:00:00,16.9,4.5,28.9,22.9,1.3,19.7,3.8,0.5,1.4,...,0.772,0.6,1.5,0.6,1.1,2.4,,,0.5,train
3,2022-01-01 04:00:00,14.3,3.2,25.5,20.0,1.5,15.4,2.7,0.4,1.2,...,0.414,0.2,1.2,0.7,0.8,1.8,,,0.2,train
4,2022-01-01 05:00:00,14.9,3.5,26.4,20.6,1.2,17.5,2.2,0.5,1.2,...,0.279,0.8,1.1,0.4,0.9,1.9,,,0.3,train


In [6]:
df_test.groupby('train or test')['time'].count()


train or test
test1    168
test2    168
test3    168
test4    168
Name: time, dtype: int64

In [7]:
SEQ_LEN = 168


In [8]:
# 参考开源项目https://github.com/lhrgo/Competition-code/blob/main/baseline.ipynb
test_list1 = df_test.groupby('train or test')['time'].first().reset_index()
test_list1 = test_list1['time'].values.tolist()
test_list2 = df_test.groupby('train or test')['time'].last().reset_index()
test_list2 = test_list2['time'].values.tolist()
test_list1.extend(test_list2)
test_list1.sort()
test_list1


['2022-05-01 01:00:00',
 '2022-05-08 00:00:00',
 '2022-06-01 01:00:00',
 '2022-06-08 00:00:00',
 '2022-07-21 01:00:00',
 '2022-07-28 00:00:00',
 '2022-08-21 01:00:00',
 '2022-08-28 00:00:00']

建模分析

In [9]:
COLUMNS_Y = ['flow_{}'.format(i) for i in range(1, 21)]
COLUMNS_X = COLUMNS_Y + ['day', 'hour', 'dayofweek']
COLUMNS_X, COLUMNS_Y


(['flow_1',
  'flow_2',
  'flow_3',
  'flow_4',
  'flow_5',
  'flow_6',
  'flow_7',
  'flow_8',
  'flow_9',
  'flow_10',
  'flow_11',
  'flow_12',
  'flow_13',
  'flow_14',
  'flow_15',
  'flow_16',
  'flow_17',
  'flow_18',
  'flow_19',
  'flow_20',
  'day',
  'hour',
  'dayofweek'],
 ['flow_1',
  'flow_2',
  'flow_3',
  'flow_4',
  'flow_5',
  'flow_6',
  'flow_7',
  'flow_8',
  'flow_9',
  'flow_10',
  'flow_11',
  'flow_12',
  'flow_13',
  'flow_14',
  'flow_15',
  'flow_16',
  'flow_17',
  'flow_18',
  'flow_19',
  'flow_20'])

In [10]:
def add_time_feat(data):
    data['time'] = pd.to_datetime(data['time'])
    data['day'] = data['time'].dt.day
    data['hour'] = data['time'].dt.hour
    data['minute'] = data['time'].dt.minute
    data['dayofweek'] = data['time'].dt.dayofweek
    return data.sort_values('time').reset_index(drop=True)

def add_other_feat(data, columns):
    data['flow_sum'] = data[columns].sum()
    data['flow_median'] = data[columns].median()
    data['flow_mean'] = data[columns].mean()
    return data



In [11]:
df_hour = add_time_feat(df_hour)
df_hour.head()


Unnamed: 0,time,flow_1,flow_2,flow_3,flow_4,flow_5,flow_6,flow_7,flow_8,flow_9,...,flow_16,flow_17,flow_18,flow_19,flow_20,train or test,day,hour,minute,dayofweek
0,2022-01-01 01:00:00,29.7,14.6,54.7,40.1,3.0,49.7,10.9,1.1,5.0,...,3.5,6.8,,1.806,1.4,train,1,1,0,5
1,2022-01-01 02:00:00,21.9,9.0,38.0,27.7,2.4,30.2,6.4,0.4,2.6,...,2.3,4.5,,3.847,0.8,train,1,2,0,5
2,2022-01-01 03:00:00,16.9,4.5,28.9,22.9,1.3,19.7,3.8,0.5,1.4,...,1.1,2.4,,,0.5,train,1,3,0,5
3,2022-01-01 04:00:00,14.3,3.2,25.5,20.0,1.5,15.4,2.7,0.4,1.2,...,0.8,1.8,,,0.2,train,1,4,0,5
4,2022-01-01 05:00:00,14.9,3.5,26.4,20.6,1.2,17.5,2.2,0.5,1.2,...,0.9,1.9,,,0.3,train,1,5,0,5


In [12]:
class Trans:
    def __init__(self, data, name):
        self.min = max(0, np.percentile(data, 1))
        self.max = np.percentile(data, 99)
        self.base = self.max-self.min

    def transform(self, data, scale=True):
        _data = np.clip(data, self.min, self.max)
        if not scale:
            return _data
        return (_data-self.min)/self.base

class TransUtil:
    def __init__(self, data, exclude_cols=None):
        self.columns = data.columns
        self.exclude_cols = exclude_cols
        self.trans = {}
        for c in self.columns:
            if data[c].dtype not in [int, float]:
                print('column "{}" not init trans...'.format(c))
                continue

            if exclude_cols is None or (exclude_cols is not None and c not in exclude_cols):
                print('init trans column...', c)
                self.trans[c] = Trans(data[c].fillna(method='backfill').fillna(method='ffill'), c)

    def transform(self, data, col_name, scale=True):
        if self.exclude_cols is not None and col_name in self.exclude_cols:
            return data

        for t in self.trans:
            if t.startswith(col_name):
                return self.trans[t].transform(data, scale=scale)
        
        return data


In [13]:
trans_util = TransUtil(df_hour, exclude_cols=None) # 数据标准化


column "time" not init trans...
init trans column... flow_1
init trans column... flow_2
init trans column... flow_3
init trans column... flow_4
init trans column... flow_5
init trans column... flow_6
init trans column... flow_7
init trans column... flow_8
init trans column... flow_9
init trans column... flow_10
init trans column... flow_11
init trans column... flow_12
init trans column... flow_13
init trans column... flow_14
init trans column... flow_15
init trans column... flow_16
init trans column... flow_17
init trans column... flow_18
init trans column... flow_19
init trans column... flow_20
column "train or test" not init trans...
init trans column... day
init trans column... hour
init trans column... minute
init trans column... dayofweek


In [14]:
def generate_xy_pair(data, seq_len, trans_util, columns_x, columns_y):
    data_x = pd.DataFrame()
    for c in columns_x:
        data_x[c] = trans_util.transform(data[c].fillna(data[c].median()), c)

    data_y = pd.DataFrame()
    for c in columns_y:
        data_y[c] = trans_util.transform(data[c].fillna(data[c].median()), c, scale=False)

    data_x = data_x.values
    data_y = data_y.values
    
    print(data_x.shape, data_y.shape)

    d_x = []
    d_y = []
    for i in range(len(data_x)-seq_len*2+1):
        _x = data_x[i:i+seq_len]
        _y = data_y[i+seq_len:i+seq_len+seq_len]

        assert len(_x) == len(_y) == seq_len, (_x, _y, _x.shape, _y.shape, i, len(data_x))

        d_x.append(_x.T)
        d_y.append(_y.T)

    return np.asarray(d_x).transpose((0, 2, 1)), np.asarray(d_y).transpose((0, 2, 1))


In [15]:
data_x, data_y = generate_xy_pair(df_hour, seq_len=SEQ_LEN, trans_util=trans_util, columns_x=COLUMNS_X, columns_y=COLUMNS_Y)
data_x.shape, data_y.shape
data_x[0], data_y[0]


(5736, 23) (5736, 20)


(array([[0.19510716, 0.2526096 , 0.26320132, ..., 0.        , 0.04347826,
         0.83333333],
        [0.11625556, 0.13569937, 0.12541254, ..., 0.        , 0.08695652,
         0.83333333],
        [0.06570966, 0.04175365, 0.05033003, ..., 0.        , 0.13043478,
         0.83333333],
        ...,
        [0.63687829, 0.98538622, 0.92739274, ..., 0.2       , 0.95652174,
         0.66666667],
        [0.92094622, 0.6993737 , 0.67986799, ..., 0.2       , 1.        ,
         0.66666667],
        [0.26991508, 0.44050104, 0.38118812, ..., 0.23333333, 0.        ,
         0.83333333]]),
 array([[ 23.6  ,  12.2  ,  40.6  , ...,   3.932,   1.15 ,   1.4  ],
        [ 15.6  ,   5.   ,  32.6  , ...,   1.575,   0.509,   0.3  ],
        [ 12.4  ,   3.9  ,  25.1  , ...,   1.042,   0.394,   0.3  ],
        ...,
        [ 71.3  ,  46.3  , 133.3  , ...,  14.968,   6.192,   4.8  ],
        [ 60.7  ,  37.   , 105.5  , ...,  12.944,   5.072,   4.   ],
        [ 35.   ,  19.8  ,  67.5  , ...,   8.908,  

In [16]:
# 根据每段测试集将对应的训练数据/测试数据的idx提取出来
_train_idx_1 = df_hour[df_hour['time']<test_list1[0]].index.values.tolist()
_train_idx_2 = df_hour[(df_hour['time']>test_list1[1])&(df_hour['time']<test_list1[2])].index.values.tolist()
_train_idx_3 = df_hour[(df_hour['time']>test_list1[3])&(df_hour['time']<test_list1[4])].index.values.tolist()
_train_idx_4 = df_hour[(df_hour['time']>test_list1[5])&(df_hour['time']<test_list1[6])].index.values.tolist()

# 每一段数据包括上一段时间
train_idx_1 = _train_idx_1[:-SEQ_LEN*2]
train_idx_2 = train_idx_1 + _train_idx_2[:-SEQ_LEN*2]
train_idx_3 = train_idx_2 + _train_idx_3[:-SEQ_LEN*2]
train_idx_4 = train_idx_3 + _train_idx_4[:-SEQ_LEN*2]

test_idx_1 = _train_idx_1[-SEQ_LEN]
test_idx_2 = _train_idx_2[-SEQ_LEN]
test_idx_3 = _train_idx_3[-SEQ_LEN]
test_idx_4 = _train_idx_4[-SEQ_LEN]


In [17]:
len(_train_idx_1), len(_train_idx_2), len(_train_idx_3), len(_train_idx_4)


(2880, 576, 1032, 576)

In [18]:
len(train_idx_1), len(train_idx_2), len(train_idx_3), len(train_idx_4)


(2544, 2784, 3480, 3720)

In [19]:
test_idx_1, test_idx_2, test_idx_3, test_idx_4


(2712, 3456, 4656, 5400)

In [20]:
train_x_1 = data_x[train_idx_1]
train_y_1 = data_y[train_idx_1]
train_x_2 = data_x[train_idx_2]
train_y_2 = data_y[train_idx_2]
train_x_3 = data_x[train_idx_3]
train_y_3 = data_y[train_idx_3]
train_x_4 = data_x[train_idx_4]
train_y_4 = data_y[train_idx_4]

test_x_1 = data_x[test_idx_1]
test_x_2 = data_x[test_idx_2]
test_x_3 = data_x[test_idx_3]
test_x_4 = data_x[test_idx_4]

FEATURE_SIZE = train_x_1.shape[-1]
OUTPUT_SIZE = train_y_1.shape[-1]


In [21]:
train_x_1.shape, train_y_1.shape, test_x_1.shape


((2544, 168, 23), (2544, 168, 20), (168, 23))

In [22]:
import paddle
import paddle.nn as nn
import paddle.nn.functional as F

class Tt(nn.Layer):
    def __init__(self,
                 seq_len,
                 feature_size,
                 output_size,
                 use_model='lstm',
                 hidden_size=576,
                 num_hidden_layers=6,
                 num_attention_heads=6,
                 intermediate_size=3072,
                 hidden_act="gelu",
                 hidden_dropout_prob=0.1,
                 attention_probs_dropout_prob=0.1,
                 max_position_embeddings=512,
                 max_hour=25,
                 max_min=61,
                 max_dow=8,
                 max_ts=1441):
        super(Tt, self).__init__()

        self.use_model = use_model
        self.feature_size = feature_size

        # 如果有相应的时间embedding则可以使用
        self.th_embeddings = nn.Embedding(max_hour, hidden_size)
        self.tm_embeddings = nn.Embedding(max_min, hidden_size)
        self.td_embeddings = nn.Embedding(max_dow, hidden_size)
        self.tt_embeddings = nn.Embedding(max_ts, hidden_size)

        # 位置编码
        self.position_embeddings = nn.Embedding(max_position_embeddings, hidden_size)
        self.layer_norm = nn.LayerNorm(hidden_size)
        self.fc_inputs = nn.Linear(feature_size, hidden_size)

        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size,
            num_attention_heads,
            intermediate_size,
            dropout=hidden_dropout_prob,
            activation=hidden_act,
            attn_dropout=attention_probs_dropout_prob,
            act_dropout=0)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)

        self.lstm = paddle.nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=2)

        self.fc_output_1 = nn.Linear(hidden_size, hidden_size)
        self.fc_output_2 = nn.Linear(hidden_size, hidden_size)
        self.fc_output_3 = nn.Linear(hidden_size, output_size)

    def forward(self,
                inputs,
                inputs_th=None,
                inputs_tm=None,
                inputs_td=None,
                inputs_tt=None,
                position_ids=None,
                attention_mask=None):

        if position_ids is None:
            ones = paddle.ones(inputs.shape[:2], dtype="int64")
            seq_length = paddle.cumsum(ones, axis=1)
            position_ids = seq_length - ones
            position_ids.stop_gradient = True

        position_embeddings = self.position_embeddings(position_ids)

        inputs = self.fc_inputs(inputs)
        inputs = nn.Tanh()(inputs)

        inputs = inputs + position_embeddings

        # 如果有相应的时间embedding则可以使用
        if inputs_th is not None:
            inputs += self.th_embeddings(inputs_th)
        
        if inputs_tm is not None:
            inputs += self.tm_embeddings(inputs_tm)

        if inputs_td is not None:
            inputs += self.td_embeddings(inputs_td)

        if inputs_tt is not None:
            inputs += self.tt_embeddings(inputs_tt)

        inputs = self.layer_norm(inputs)

        # 选择使用LSTM或者Transformer
        if self.use_model == 'lstm':
            encoder_outputs, (h, c) = self.lstm(inputs)
        elif self.use_model == 'transformer':
            if attention_mask is None:
                attention_mask = paddle.unsqueeze(
                    (paddle.zeros(inputs.shape[:2])).astype(
                        self.fc_inputs.weight.dtype) * -1e4,
                    axis=[1, 2])

            encoder_outputs = self.encoder(
                inputs,
                src_mask=attention_mask)

        output = self.fc_output_1(encoder_outputs)
        output = nn.ReLU()(output)
        output = self.fc_output_2(output)
        output = self.fc_output_3(output)

        return output



In [23]:
import paddle
import paddle.nn.functional as F
from paddle.metric import Accuracy
from paddle.io import DataLoader, BatchSampler
from paddlenlp.datasets import MapDataset
from paddlenlp.data import DataCollatorWithPadding
from paddlenlp.data import Dict, Stack, Pad


In [25]:
def calc_score(y_true, y_pred):
    return 1/(1+msle(np.clip(np.reshape(y_true, -1), 0, None), np.clip(np.reshape(y_pred, -1), 0, None)))

def eval_model(model, data_loader):
    model.eval()

    y_pred = []
    y_true = []
    for step, batch in enumerate(data_loader, start=1):
        data = batch['data'].astype('float32')
        label = batch['label'].astype('float32')

        # 计算模型输出
        output = model(inputs=data)
        y_pred.extend(output.numpy())
        y_true.extend(label.numpy())
    
    score = calc_score(y_true, y_pred)
    model.train()
    return score

def make_data_loader(data_x, idx, batch_size, data_y=None, shuffle=False):

    data = [{
        'data': data_x[i], 
        'label': 0 if data_y is None else data_y[i]} 
        for i in idx]
    ds = MapDataset(data)
    batch_sampler = BatchSampler(ds, batch_size=batch_size, shuffle=shuffle)
    return DataLoader(dataset=ds, batch_sampler=batch_sampler)



In [26]:
EPOCHS = 30
BATCH_SIZE = 256
CKPT_DIR = 'work/output'
K_FOLD = 5
epoch_base = 0
step_eval = 5
step_log = 100

def do_train(train_x, train_y, prefix):
    print('-'*20)
    print('training ...', prefix)
    print('train x:', np.shape(train_x), 'train y:', np.shape(train_y))

    paddle.seed(2022)

    for kfold, tv_idx in enumerate(KFold(n_splits=K_FOLD, shuffle=True, random_state=2022).split(train_x)):
        print('training fold...', kfold)

        train_idx, valid_idx = tv_idx

        model = Tt(seq_len=SEQ_LEN, feature_size=FEATURE_SIZE, output_size=OUTPUT_SIZE)

        train_data_loader = make_data_loader(
            train_x, train_idx, BATCH_SIZE, data_y=train_y, shuffle=True)
        valid_data_loader = make_data_loader(
            train_x, valid_idx, BATCH_SIZE, data_y=train_y, shuffle=False)

        optimizer = paddle.optimizer.AdamW(learning_rate=1e-4, parameters=model.parameters())
        criterion = paddle.nn.MSELoss()

        epochs = EPOCHS # 训练轮次
        save_dir = CKPT_DIR #训练过程中保存模型参数的文件夹
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        global_step = 0 #迭代次数
        tic_train = time.time()

        model.train()

        best_score = 0
        for epoch in range(1+epoch_base, epochs+epoch_base+1):
            for step, batch in enumerate(train_data_loader, start=1):
                data = batch['data'].astype('float32')
                label = batch['label'].astype('float32')

                # 计算模型输出
                output = model(inputs=data)
                loss = criterion(output, label)
                # print(loss)

                # 打印损失函数值、准确率、计算速度
                global_step += 1
                if global_step % step_eval == 0:
                    score = eval_model(model, valid_data_loader)            
                    if score > best_score:
                        # print('saving best model...', score)
                        _save_dir = os.path.join(save_dir, '{}_kfold_{}_best_model.pdparams'.format(prefix, kfold))
                        paddle.save(
                            model.state_dict(),
                            _save_dir)
                        best_score = score
                    if global_step % step_log == 0:
                        print(
                            'global step %d, epoch: %d, batch: %d, loss: %.5f, valid score: %.5f, speed: %.2f step/s'
                            % (global_step, epoch, step, loss, score,
                                10 / (time.time() - tic_train)))
                        tic_train = time.time()

                # 反向梯度回传，更新参数
                loss.backward()
                optimizer.step()
                optimizer.clear_grad()



In [27]:
def do_pred(test_x, prefix):
    print('-'*20)
    print('predict ...', prefix)
    print('predict x:', np.shape(test_x))

    # 预测
    test_data_loader = make_data_loader(
            [test_x], [0], BATCH_SIZE, data_y=None, shuffle=False)

    sub_df = []
    save_dir = CKPT_DIR

    for kfold in range(K_FOLD):
        print('predict kfold...', kfold)
        model = Tt(seq_len=SEQ_LEN, feature_size=FEATURE_SIZE, output_size=OUTPUT_SIZE)
        model.set_dict(paddle.load(os.path.join(save_dir, '{}_kfold_{}_best_model.pdparams'.format(prefix, kfold))))
        model.eval()

        y_pred = []
        for step, batch in enumerate(test_data_loader, start=1):
            data = batch['data'].astype('float32')
            label = batch['label'].astype('float32')

            # 计算模型输出
            output = model(inputs=data)
            y_pred.extend(output.numpy())

        sub_df.append(np.clip(y_pred, 0, None))
    
    return sub_df


In [28]:
# 依次训练每个测试集对应的模型
do_train(train_x_1, train_y_1, 'm1')
do_train(train_x_2, train_y_2, 'm2')
do_train(train_x_3, train_y_3, 'm3')
do_train(train_x_4, train_y_4, 'm4')


--------------------
training ... m1
train x: (2544, 168, 23) train y: (2544, 168, 20)
training fold... 0


W0406 10:10:46.552830   286 gpu_resources.cc:61] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 11.2
W0406 10:10:46.561936   286 gpu_resources.cc:91] device: 0, cuDNN Version: 8.2.


global step 100, epoch: 13, batch: 4, loss: 175.56583, valid score: 0.75590, speed: 0.25 step/s
global step 200, epoch: 25, batch: 8, loss: 24.61544, valid score: 0.94394, speed: 0.22 step/s
training fold... 1
global step 100, epoch: 13, batch: 4, loss: 179.69794, valid score: 0.75386, speed: 0.28 step/s
global step 200, epoch: 25, batch: 8, loss: 27.43861, valid score: 0.94326, speed: 0.22 step/s
training fold... 2
global step 100, epoch: 13, batch: 4, loss: 185.64319, valid score: 0.74746, speed: 0.28 step/s
global step 200, epoch: 25, batch: 8, loss: 25.38878, valid score: 0.94316, speed: 0.22 step/s
training fold... 3
global step 100, epoch: 13, batch: 4, loss: 183.27307, valid score: 0.74655, speed: 0.26 step/s
global step 200, epoch: 25, batch: 8, loss: 26.38558, valid score: 0.94322, speed: 0.20 step/s
training fold... 4
global step 100, epoch: 13, batch: 4, loss: 190.35733, valid score: 0.74302, speed: 0.26 step/s
global step 200, epoch: 25, batch: 8, loss: 27.4549

In [29]:
# 以此预测数据
pred_1 = do_pred(test_x_1, 'm1')
pred_2 = do_pred(test_x_2, 'm2')
pred_3 = do_pred(test_x_3, 'm3')
pred_4 = do_pred(test_x_4, 'm4')


--------------------
predict ... m1
predict x: (168, 23)
predict kfold... 0
predict kfold... 1
predict kfold... 2
predict kfold... 3
predict kfold... 4
--------------------
predict ... m2
predict x: (168, 23)
predict kfold... 0
predict kfold... 1
predict kfold... 2
predict kfold... 3
predict kfold... 4
--------------------
predict ... m3
predict x: (168, 23)
predict kfold... 0
predict kfold... 1
predict kfold... 2
predict kfold... 3
predict kfold... 4
--------------------
predict ... m4
predict x: (168, 23)
predict kfold... 0
predict kfold... 1
predict kfold... 2
predict kfold... 3
predict kfold... 4


In [30]:
np.shape(pred_1), np.shape(pred_2), np.shape(pred_3), np.shape(pred_4)


((5, 1, 168, 20), (5, 1, 168, 20), (5, 1, 168, 20), (5, 1, 168, 20))

In [34]:
result = np.vstack((
    np.mean(pred_1, axis=0).squeeze(),
    np.mean(pred_2, axis=0).squeeze(),
    np.mean(pred_3, axis=0).squeeze(),
    np.mean(pred_4, axis=0).squeeze()))

result[result<0] = 0
result = pd.concat([df_sub['time'], pd.DataFrame(result)], axis=1)
result.columns = df_sub.columns
result.to_csv('data/result_0929_1.csv', index=False, encoding='utf-8')
result


Unnamed: 0,time,flow_1,flow_2,flow_3,flow_4,flow_5,flow_6,flow_7,flow_8,flow_9,...,flow_11,flow_12,flow_13,flow_14,flow_15,flow_16,flow_17,flow_18,flow_19,flow_20
0,2022-05-01 01:00:00,16.028769,8.938498,25.824286,18.263462,2.319455,34.091320,7.098834,1.042198,3.206811,...,2.627367,1.591180,1.133499,1.451102,0.945343,2.269308,4.031459,2.973471,1.383120,0.913169
1,2022-05-01 02:00:00,13.725288,5.646388,22.071003,14.292566,1.671022,25.623486,4.295956,0.697841,2.057410,...,1.590369,1.177243,0.769452,0.792114,0.600167,1.386405,2.588089,2.170932,1.028667,0.689110
2,2022-05-01 03:00:00,13.469528,4.543365,21.442959,13.181372,1.487662,23.248003,3.364924,0.605411,1.712633,...,1.230370,1.091679,0.663926,0.576019,0.508951,1.095200,2.116816,1.994373,0.977895,0.606753
3,2022-05-01 04:00:00,14.657736,4.821479,23.173054,14.208540,1.608277,25.092999,3.574058,0.663720,1.855825,...,1.291748,1.189711,0.714366,0.605407,0.548337,1.167339,2.260079,2.167644,1.083925,0.651702
4,2022-05-01 05:00:00,17.631962,6.325089,27.860052,17.469187,2.022393,31.272532,4.743249,0.853600,2.416523,...,1.722761,1.475549,0.903938,0.833288,0.698273,1.524537,2.941926,2.667717,1.325687,0.833104
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,2022-08-27 20:00:00,64.775070,33.973736,105.530563,70.476746,8.962332,133.558868,26.417416,3.818425,12.041477,...,9.812472,5.861829,4.763965,5.611264,3.603678,8.674849,15.281856,11.547084,5.263415,3.861406
668,2022-08-27 21:00:00,75.138062,41.462505,121.219101,80.331474,10.705835,157.681290,32.732964,4.685743,14.867587,...,12.036128,7.044459,5.620816,7.148047,4.415468,10.754012,18.737316,13.825241,6.411537,4.410372
669,2022-08-27 22:00:00,76.245522,42.621239,124.174049,84.492027,10.864219,158.392609,33.683430,4.788584,15.091476,...,12.583740,7.364081,5.833311,7.438890,4.540409,11.327089,19.371675,14.325354,6.528169,4.468001
670,2022-08-27 23:00:00,60.700447,32.236477,103.181885,76.711304,8.193936,117.980736,24.911335,3.513865,10.703789,...,9.918112,6.050180,4.803396,5.445399,3.438726,9.059504,14.778265,11.496176,4.812160,3.615540
