In [107]:
import datetime

import jax
import numpy as np
import pandas as pd
import datetime
import os
import torch
from torch.utils.data import Dataset, random_split, DataLoader
from importlib import reload

import sys

import data_provider.data_prep, data_provider.data_loader, utils.timefeatures

reload(data_provider.data_prep)
reload(data_provider.data_loader)
reload(utils.timefeatures)

pd.set_option("display.max_columns", None)

# CSV file function

In [217]:
%%time
# sample from the trading data
def downsampling(df, freq="10S", use_weighted_mean=True):
    """
    Return a dictionary with buy side and sell side prices and volumes of different levels, with missing values unhandled.
    """
    # grp_df = df.groupby(pd.Grouper(freq=freq))
    grp_df = df.set_index('time').resample(f'{freq}')

    if use_weighted_mean:
        def _agg_price(level, side="Ask"): # period price = weighted mean
            return lambda x: np.mean(np.repeat(x[f'L{level}-{side}Price'], x[f'L{level}-{side}Size']))
    else:
        def _agg_price(level, side="Ask"): # period price =  price with largest single-transaction volume
            return lambda x: df['L1-AskPrice'][df['L1-AskSize'].argmax()]
    def _agg_size(level, side="Ask"):
        return lambda x: np.sum(x[f'L{level}-{side}Size'])

    my_dict = {}
    my_dict['_p_Ask'], my_dict['_q_Ask'], my_dict['_p_Bid'], my_dict['_q_Bid'] = [], [], [], []

    for i in range(1,11):
        for side in ['Ask','Bid']:
            _p = grp_df.apply(_agg_price(i,side=side)).rename(f'L{i}_{side}Price').transform(lambda x: x.fillna(method='ffill'))
            _q = grp_df.apply(_agg_size(i,side=side)).rename(f'L{i}_{side}Size').transform(lambda x: x.fillna(0))
            my_dict[f'_p_{side}'].append(_p)
            my_dict[f'_q_{side}'].append(_q)

    return my_dict

my_dict = downsampling(df, freq="10S", use_weighted_mean=True)

CPU times: user 21.1 s, sys: 28.2 ms, total: 21.2 s
Wall time: 21.2 s


In [219]:
def create_pv_array(my_dict):
    """process a dictionary produced by downsampling function and return numpy array"""
    # Price: A1 < A2 < A3 ... B1 is highest bidder
    _p_A_lh = pd.concat( my_dict['_p_Ask'], axis=1) # price of ask side from low->high price
    _q_A_lh = pd.concat( my_dict['_q_Ask'], axis=1) # quantity of ask side from low->high price
    _logcum_q_A_lh = np.log(_q_A_lh.cumsum(axis=1)) # log of cumulative sum over a set of (low->high) price points
    # Price: B1 > B2 > B3 ... B1 is highest bidder
    _p_B_lh = pd.concat( my_dict['_p_Bid'][::-1], axis=1) # price of ask side from low->high price
    _q_B_lh = pd.concat( my_dict['_q_Bid'][::-1], axis=1) # quantity of ask side from low->high
    _q_B_hl = pd.concat( my_dict['_q_Bid'], axis=1) # quantity of ask side from high->low price
    _logcum_q_B_lh = np.log(_q_B_hl[_q_B_hl.columns[::-1]].cumsum(axis=1)) # log of cumulative sum over a set of (low->high) price

    return np.dstack([_p_A_lh, _logcum_q_A_lh, _p_B_lh, _logcum_q_B_lh])

np_arr = create_pv_array(my_dict)
np_arr[:,:,1][0:3]

array([[ 6.96224346,  7.98582467,  8.59951023,  9.44192817, 10.26346703,
        10.46216004, 10.76947397, 11.07294655, 11.17237693, 11.30404225],
       [ 6.73933663,  7.40913644,  7.7406644 ,  8.0426995 ,  8.72826416,
         8.87388814,  9.69541733,  9.7464243 ,  9.80879218, 10.23433687],
       [ 5.80513497,  6.80682936,  7.14834574,  7.40306109,  7.84227878,
         8.26487826,  8.71358201,  9.18122047,  9.23561818,  9.4417695 ]])

# CSV file process walkthrough

In [109]:
df = pd.read_csv(f"../data/SB/raw/SB_2022-01-11.csv")

# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.'time'.html
df.rename(columns = {'Date-Time' : 'time'}, inplace = True)
df['time'] = pd.to_datetime(df['time'])
print(f'total length {len(df)}')

# datetime64 ns utc to datetime
# https://stackoverflow.com/questions/62917882/convert-datetime64ns-utc-pandas-column-to-datetime

select_cols = ["time"]
price_cols = []
size_cols = []

for k in range(1, 11):
    price_cols += [f"L{k}-AskPrice", f"L{k}-BidPrice"]
    size_cols += [f"L{k}-AskSize", f"L{k}-BidSize"]
    select_cols += [f'L{k}-AskPrice', f'L{k}-BidPrice', f'L{k}-AskSize', f'L{k}-BidSize']
df = df[select_cols]
df = df.dropna()
# df.set_index('time', inplace=True)

#--- demo of several functions
def price_mode(level, side="Ask"):
    return lambda x: df['L1-AskPrice'][df['L1-AskSize'].argmax()]

def size_total(level, side="Ask"):
    return lambda x: np.sum(x[f'L{level}-{side}Size'])

# general way of calculated weighted mean/median without zero-division error
def wt_mean(level, side="Ask"):
    return lambda x: np.mean(np.repeat(x[f'L{level}-{side}Price'], x[f'L{level}-{side}Size']))

grp_df = df.groupby(pd.Grouper(key='time',freq="5S"))

res = grp_df.apply(wt_mean(1, side='Ask'))
print(f'{grp_df.apply(price_mode(1))[0]}, {sum(res.isnull())}, '
      f'{grp_df.apply(size_total(1))[0]}'
      f'{res}')

total length 41869
18.0, 1299, 553.0time
2022-01-11 08:30:00+00:00    17.964051
2022-01-11 08:30:05+00:00    17.946382
2022-01-11 08:30:10+00:00    17.940000
2022-01-11 08:30:15+00:00    17.940000
2022-01-11 08:30:20+00:00    17.940000
                               ...    
2022-01-11 17:59:40+00:00    18.117795
2022-01-11 17:59:45+00:00    18.117619
2022-01-11 17:59:50+00:00    18.118746
2022-01-11 17:59:55+00:00    18.128299
2022-01-11 18:00:00+00:00    18.140207
Freq: 5S, Length: 6841, dtype: float64


In [135]:
# when price is weighted mean, vol is total, we can recover the total traded amount
A = np.arange(6).reshape(3,2)
_wt = np.mean(np.repeat(A[:,0], A[:,1]))

print(np.sum(_wt  * A[:,1], axis=None))
print(np.sum(A[:, 0] * A[:, 1]))
print(np.sum(_wt * np.sum(A[:,1]),axis=0))

26.0
26
26.0


In [117]:
# we have empties for some periods at resampled frequencies
tmp = grp_df.mean()
null_idx = tmp.isnull().any(axis=1)
tmp[null_idx].head(2)

Unnamed: 0_level_0,L1-AskPrice,L1-BidPrice,L1-AskSize,L1-BidSize,L2-AskPrice,L2-BidPrice,L2-AskSize,L2-BidSize,L3-AskPrice,L3-BidPrice,...,L8-AskSize,L8-BidSize,L9-AskPrice,L9-BidPrice,L9-AskSize,L9-BidSize,L10-AskPrice,L10-BidPrice,L10-AskSize,L10-BidSize
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-11 08:31:50+00:00,,,,,,,,,,,...,,,,,,,,,,
2022-01-11 08:31:55+00:00,,,,,,,,,,,...,,,,,,,,,,


In [136]:
tmp[price_cols] = tmp[price_cols].fillna(method='ffill')
tmp[size_cols] = tmp[size_cols].fillna(0)
tmp[null_idx].head(2)

Unnamed: 0_level_0,L1-AskPrice,L1-BidPrice,L1-AskSize,L1-BidSize,L2-AskPrice,L2-BidPrice,L2-AskSize,L2-BidSize,L3-AskPrice,L3-BidPrice,...,L8-AskSize,L8-BidSize,L9-AskPrice,L9-BidPrice,L9-AskSize,L9-BidSize,L10-AskPrice,L10-BidPrice,L10-AskSize,L10-BidSize
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-11 08:31:50+00:00,17.94,17.93,0.0,0.0,17.95,17.92,0.0,0.0,17.96,17.91,...,0.0,0.0,18.02,17.85,0.0,0.0,18.03,17.84,0.0,0.0
2022-01-11 08:31:55+00:00,17.94,17.93,0.0,0.0,17.95,17.92,0.0,0.0,17.96,17.91,...,0.0,0.0,18.02,17.85,0.0,0.0,18.03,17.84,0.0,0.0


In [113]:
df.set_index('time').resample('10S').sum()

Unnamed: 0_level_0,L1-AskPrice,L1-BidPrice,L1-AskSize,L1-BidSize,L2-AskPrice,L2-BidPrice,L2-AskSize,L2-BidSize,L3-AskPrice,L3-BidPrice,L3-AskSize,L3-BidSize,L4-AskPrice,L4-BidPrice,L4-AskSize,L4-BidSize,L5-AskPrice,L5-BidPrice,L5-AskSize,L5-BidSize,L6-AskPrice,L6-BidPrice,L6-AskSize,L6-BidSize,L7-AskPrice,L7-BidPrice,L7-AskSize,L7-BidSize,L8-AskPrice,L8-BidPrice,L8-AskSize,L8-BidSize,L9-AskPrice,L9-BidPrice,L9-AskSize,L9-BidSize,L10-AskPrice,L10-BidPrice,L10-AskSize,L10-BidSize
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
2022-01-11 08:30:00+00:00,2819.29,2817.02,1056.0,2759.0,2820.86,2815.24,1883.0,1314.0,2822.43,2813.67,2490.0,1648.0,2824.00,2812.10,7177.0,3336.0,2825.57,2810.53,16060.0,4640.0,2827.14,2808.96,6301.0,4514.0,2828.71,2807.39,12580.0,1788.0,2830.28,2805.82,16858.0,1324.0,2831.85,2804.25,6733.0,3518.0,2833.42,2802.68,10011.0,5434.0
2022-01-11 08:30:10+00:00,897.00,896.50,845.0,901.0,897.50,896.00,806.0,540.0,898.00,895.50,649.0,740.0,898.50,895.00,811.0,2800.0,899.00,894.50,3064.0,750.0,899.50,894.00,968.0,568.0,900.00,893.50,9100.0,600.0,900.50,893.00,850.0,1088.0,901.00,892.50,1100.0,3450.0,901.50,892.00,9650.0,705.0
2022-01-11 08:30:20+00:00,520.07,519.78,332.0,808.0,520.36,519.49,572.0,446.0,520.65,519.20,368.0,1215.0,520.94,518.91,369.0,787.0,521.23,518.62,905.0,325.0,521.52,518.33,1339.0,423.0,521.81,518.04,2200.0,615.0,522.10,517.75,3628.0,1561.0,522.39,517.46,543.0,956.0,522.68,517.17,2348.0,1565.0
2022-01-11 08:30:30+00:00,484.11,483.84,452.0,409.0,484.38,483.57,679.0,524.0,484.65,483.30,270.0,1485.0,484.92,483.03,382.0,324.0,485.19,482.76,432.0,270.0,485.46,482.49,1620.0,459.0,485.73,482.22,540.0,675.0,486.00,481.95,4914.0,1863.0,486.27,481.68,459.0,410.0,486.54,481.41,594.0,2025.0
2022-01-11 08:30:40+00:00,233.09,232.96,390.0,111.0,233.22,232.83,222.0,240.0,233.35,232.70,131.0,716.0,233.48,232.57,208.0,156.0,233.61,232.44,209.0,131.0,233.74,232.31,780.0,221.0,233.87,232.18,260.0,325.0,234.00,232.05,2361.0,897.0,234.13,231.92,223.0,210.0,234.26,231.79,286.0,975.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-11 17:59:20+00:00,597.80,597.47,1008.0,678.0,598.13,597.14,907.0,1305.0,598.46,596.81,1633.0,1809.0,598.79,596.48,2298.0,1839.0,599.12,596.15,2593.0,1782.0,599.45,595.82,1740.0,2742.0,599.78,595.49,786.0,2240.0,600.11,595.16,1047.0,773.0,600.44,594.83,3086.0,1117.0,600.77,594.50,2714.0,1751.0
2022-01-11 17:59:30+00:00,380.51,380.24,682.0,346.0,380.72,380.03,638.0,802.0,380.93,379.82,1371.0,1015.0,381.14,379.61,1527.0,1163.0,381.35,379.40,1693.0,1135.0,381.56,379.19,541.0,1547.0,381.77,378.98,533.0,1701.0,381.98,378.77,844.0,448.0,382.19,378.56,2861.0,672.0,382.40,378.35,641.0,1015.0
2022-01-11 17:59:40+00:00,452.84,452.53,406.0,809.0,453.09,452.28,645.0,783.0,453.34,452.03,1060.0,1259.0,453.59,451.78,1716.0,1284.0,453.84,451.53,1897.0,1379.0,454.09,451.28,1503.0,2604.0,454.34,451.03,593.0,751.0,454.59,450.78,759.0,673.0,454.84,450.53,1925.0,920.0,455.09,450.28,2481.0,1588.0
2022-01-11 17:59:50+00:00,978.52,977.83,893.0,1023.0,979.06,977.29,1544.0,1325.0,979.60,976.75,2952.0,2155.0,980.14,976.21,3657.0,2466.0,980.68,975.67,3229.0,2796.0,981.22,975.13,1795.0,4159.0,981.76,974.59,1726.0,3011.0,982.30,974.05,3121.0,2358.0,982.84,973.51,4755.0,1596.0,983.38,972.97,2970.0,2618.0


In [None]:
_p_A_lh = pd.concat( my_dict['_p_Ask'], axis=1) # price of ask side from low->high price
_q_A_lh = pd.concat( my_dict['_q_Ask'], axis=1) # quantity of ask side from low->high price
_logcum_q_A_lh = np.log(_q_A_lh.cumsum(axis=1)) # log of cumulative sum over a set of (low->high) price points
# Price: B1 > B2 > B3 ... B1 is highest bidder
_p_B_lh = pd.concat( my_dict['_p_Bid'][::-1], axis=1) # price of ask side from low->high price
_q_B_lh = pd.concat( my_dict['_q_Bid'][::-1], axis=1) # quantity of ask side from low->high
_q_B_hl = pd.concat( my_dict['_q_Bid'], axis=1) # quantity of ask side from high->low price
_logcum_q_B_lh = np.log(_q_B_hl[_q_B_hl.columns[::-1]].cumsum(axis=1)) # log of cumulative sum over a

# dataset walkthrough

- Convert string to timestamp
- get rid of time zone
- Create column of year,month, day properly; Create

In [77]:
df_raw = pd.read_csv(os.path.join("../data/six/ETT-small",'ETTh1.csv'))
df_raw.head(2)

Unnamed: 0,date,HUFL,HULL,MUFL,MULL,LUFL,LULL,OT
0,2016-07-01 00:00:00,5.827,2.009,1.599,0.462,4.203,1.34,30.531
1,2016-07-01 01:00:00,5.693,2.076,1.492,0.426,4.142,1.371,27.787001


In [84]:
flag = 'train'
type_map = {'train': 0, 'val': 1, 'test': 2}
set_type = type_map[flag]
seq_len = 24 * 4 * 4
border1s = [0, 12 * 30 * 24 - seq_len, 12 * 30 * 24 + 4 * 30 * 24 - seq_len] # for train, valid, test
border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
border1 = border1s[set_type]
border2 = border2s[set_type]

print(border2s,border1s, seq_len, border1, border2) # 12*30*24 is hours in a year)

[8640, 11520, 14400] [0, 8256, 11136] 384 0 8640


In [50]:
features = 'S'
target = 'OT'
if features=='M' or features=='MS':
    cols_data = df_raw.columns[1:]
    df_data = df_raw[cols_data]
elif features=='S':
    df_data = df_raw[[target]]

In [82]:
from sklearn.preprocessing import StandardScaler

scale = 'True'
scaler = StandardScaler()

if scale:
    train_data = df_data[border1s[0]:border2s[0]] # 0:8640
    scaler.fit(train_data.values)
    data = scaler.transform(df_data.values)
else:
    data = df_data.values

data[:3]

array([[1.46055158],
       [1.16152666],
       [1.16152666]])

In [None]:
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)

In [76]:
from utils.timefeatures import time_features
freq = 'h'
timeenc = 0
if timeenc == 0:
    df_stamp['month'] = df_stamp.date.apply(lambda row: row.month,1) #! this 1 is for convert_dtypebool,not axis
    df_stamp['day'] = df_stamp.date.apply(lambda row: row.day,1)
    df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(),1)
    df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour,1)
    data_stamp = df_stamp.drop(['date'], axis=1).values
elif timeenc == 1:
    data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=freq)
    data_stamp = data_stamp.transpose(1, 0)
data_stamp[:2]

array([[7, 1, 4, 0],
       [7, 1, 4, 1]])

In [93]:
data_x = data[border1:border2]
data_y = data[border1:border2]

In [100]:
seq_len = 24 * 4 * 4
label_len = 24 * 4
pred_len = 24 * 4

print(f'seq_len {seq_len}, label_len {label_len}, pred_len {pred_len}')


index=100
s_begin = index
s_end = s_begin + seq_len

print(f's_begin, {s_begin} seq_len {seq_len}, s_end {s_end}')

r_begin = s_end - label_len
r_end = r_begin + label_len + pred_len

print(f'r_begin {r_begin}, r_end {r_end}')

seq_x = data_x[s_begin:s_end]
seq_y = data_y[r_begin:r_end]
seq_x_mark = data_stamp[s_begin:s_end]
seq_y_mark = data_stamp[r_begin:r_end]

print(f'seq_x {seq_x.shape}, seq_x_mark {seq_x_mark.shape}, {seq_x_mark[0]}')

seq_len 384, label_len 96, pred_len 96
s_begin, 100 seq_len 384, s_end 484
r_begin 388, r_end 580
seq_x (384, 1), seq_x_mark (384, 4), [7 5 1 4]


In [102]:
len(data_x) - seq_len - pred_len + 1

8161

In [104]:
scaler.inverse_transform(data)[:3]

array([[30.53100014],
       [27.78700066],
       [27.78700066]])

In [None]:
class Dataset_ETT_hour(Dataset):
    def __init__(self, root_path, flag='train', size=None,
                 features='S', data_path='./data/six/ETT-small/ETTh1.csv',
                 target='OT', scale=True, timeenc=0, freq='h'):
        # size [seq_len, label_len, pred_len]
        # info
        if size == None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]
        # init
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.features = features
        self.target = target
        self.scale = scale
        self.timeenc = timeenc
        self.freq = freq

        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))

        border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
        border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]

        if self.features == 'M' or self.features == 'MS':
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        elif self.features == 'S':
            df_data = df_raw[[self.target]]

        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            self.scaler.fit(train_data.values)
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values

        df_stamp = df_raw[['date']][border1:border2]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        if self.timeenc == 0:
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            data_stamp = df_stamp.drop(['date'], 1).values
        elif self.timeenc == 1:
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)

        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]
        self.data_stamp = data_stamp

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)