In [1]:
import pandas as pd
import numpy as np

## Rolling List

使用神經網絡時，我們需要將歷史數據作為 input 喂給網絡進行訓練，Trading 數據通常會用 moving window 的方式， x 軸為各項特徵，y 軸為 time slot，使用二維數據作為 input，初步想法是拉平成一維數組。

不過單獨寫成 function 後，突然想到可能不需要，也許 pytorch 的 input 可以使用 2 維數組？


二維數組拉平有很多種方式，例如 pandas 裡的 rolling 取出一個歷史片段，將片段裡的 values reshape 成一維。

reshape(-1) 的作用類似 flatten，將二維數組串接成一維，剛好是我們想要的特徵分組

不過 rolling 的前 N 個數據集裡的數據不夠，可以在 reshape 後可以用 pad 補齊

In [2]:
def flat(arr, length):
    na = arr.reshape(-1)
    return np.pad(na, (length - len(na), 0))

def rolling_to_list(df, rolling_len):
    """將 rolling 結果轉成行"""
    size = df.shape[1] * rolling_len
    ds = [flat(w.values, size) for w in df.rolling(rolling_len)]
    return ds

In [3]:
d = np.random.randn(12).reshape(-1, 1)
da = pd.DataFrame(d)
display(da)
display(pd.DataFrame(rolling_to_list(da, 2)))
display(pd.DataFrame(rolling_to_list(da, 3)))

Unnamed: 0,0
0,0.529701
1,0.030477
2,0.748231
3,-0.195844
4,-0.434935
5,-0.423516
6,-0.656387
7,-1.991834
8,0.701219
9,-1.155216


Unnamed: 0,0,1
0,0.0,0.529701
1,0.529701,0.030477
2,0.030477,0.748231
3,0.748231,-0.195844
4,-0.195844,-0.434935
5,-0.434935,-0.423516
6,-0.423516,-0.656387
7,-0.656387,-1.991834
8,-1.991834,0.701219
9,0.701219,-1.155216


Unnamed: 0,0,1,2
0,0.0,0.0,0.529701
1,0.0,0.529701,0.030477
2,0.529701,0.030477,0.748231
3,0.030477,0.748231,-0.195844
4,0.748231,-0.195844,-0.434935
5,-0.195844,-0.434935,-0.423516
6,-0.434935,-0.423516,-0.656387
7,-0.423516,-0.656387,-1.991834
8,-0.656387,-1.991834,0.701219
9,-1.991834,0.701219,-1.155216


In [4]:
d = np.random.randn(24).reshape(-1, 2)
da = pd.DataFrame(d)
display(da)
display(pd.DataFrame(rolling_to_list(da, 2)))
display(pd.DataFrame(rolling_to_list(da, 3)))

Unnamed: 0,0,1
0,0.58036,0.228347
1,-1.514504,1.451366
2,0.950389,0.250313
3,0.004316,-0.955401
4,-0.924937,0.300233
5,0.771205,-1.087573
6,-0.62943,0.682536
7,-0.179127,-0.032202
8,0.135608,1.183277
9,-0.960155,-0.045412


Unnamed: 0,0,1,2,3
0,0.0,0.0,0.58036,0.228347
1,0.58036,0.228347,-1.514504,1.451366
2,-1.514504,1.451366,0.950389,0.250313
3,0.950389,0.250313,0.004316,-0.955401
4,0.004316,-0.955401,-0.924937,0.300233
5,-0.924937,0.300233,0.771205,-1.087573
6,0.771205,-1.087573,-0.62943,0.682536
7,-0.62943,0.682536,-0.179127,-0.032202
8,-0.179127,-0.032202,0.135608,1.183277
9,0.135608,1.183277,-0.960155,-0.045412


Unnamed: 0,0,1,2,3,4,5
0,0.0,0.0,0.0,0.0,0.58036,0.228347
1,0.0,0.0,0.58036,0.228347,-1.514504,1.451366
2,0.58036,0.228347,-1.514504,1.451366,0.950389,0.250313
3,-1.514504,1.451366,0.950389,0.250313,0.004316,-0.955401
4,0.950389,0.250313,0.004316,-0.955401,-0.924937,0.300233
5,0.004316,-0.955401,-0.924937,0.300233,0.771205,-1.087573
6,-0.924937,0.300233,0.771205,-1.087573,-0.62943,0.682536
7,0.771205,-1.087573,-0.62943,0.682536,-0.179127,-0.032202
8,-0.62943,0.682536,-0.179127,-0.032202,0.135608,1.183277
9,-0.179127,-0.032202,0.135608,1.183277,-0.960155,-0.045412
