In [2]:
import os
import yaml
import time
import datetime
import pandas as pd
from finrl.meta.data_processor import DataProcessor
from finrl.meta.env_cryptocurrency_trading.env_btc_ccxt import BitcoinEnv
from finrl.meta.data_processors.processor_ccxt import CCXTEngineer
from alphaedge_cache.mea import view
from stockstats import StockDataFrame as Sdf


TRAIN_START_DATE, TRAIN_END_DATE = "2022-06-01 00:00:00", "2025-01-01 00:00:00"
EVAL_START_DATE, EVAL_END_DATE = "2025-01-01 00:00:00", "2025-07-01 00:00:00"

root_path = "/nvmdata/share/alphaedge-research/cache/v2/data_standalone/"
fields = [
    "perp_hf_tradesum_1h/open",
    "perp_hf_tradesum_1h/high",
    "perp_hf_tradesum_1h/low",
    "perp_hf_tradesum_1h/close",
    "perp_hf_tradesum_1h/volume",
]

def get_data(root_path, fields, start_date, end_date):
    columns = list(map(lambda x: x.split('/')[-1], fields))
    abs_field_paths = list(map(lambda x: os.path.join(root_path, f"{x}.mar"), fields))

    df = view(
        meafilepath=abs_field_paths[0],
        start_date=None,
        end_date=None,
        symbols=['btc'],
        df=True,
        flat=True,
        inclusive='both',
    )
    datetime = df.index
    if not isinstance(datetime, pd.DatetimeIndex):
        datetime = pd.to_datetime(datetime)
    datetime = datetime.sort_values()
    
    start_ts = pd.to_datetime(start_date)
    end_ts = pd.to_datetime(end_date)

    start_index = datetime.get_indexer([start_ts], method="backfill")[0]
    end_index = datetime.get_indexer([end_ts], method="pad")[0]
    if end_ts == datetime[end_index]:
        end_index -= 1
    datetime = datetime[start_index: end_index]
    
    df = pd.DataFrame(columns=columns, index=datetime)
    for column, abs_field_path in zip(columns, abs_field_paths):
        data = view(
            meafilepath=abs_field_path,
            start_date=datetime[0],
            end_date=datetime[-1],
            symbols=['btc'],
            df=False,
            flat=True,
            inclusive='both',
        )
        df[column] = data
    assert (df.isna().any().sum() == 0)
    print(df.shape, df.columns)
    return df

train_df = get_data(root_path, fields, TRAIN_START_DATE, TRAIN_END_DATE)
eval_df = get_data(root_path, fields, EVAL_START_DATE, EVAL_END_DATE)

(22679, 5) Index(['open', 'high', 'low', 'close', 'volume'], dtype='object')
(4343, 5) Index(['open', 'high', 'low', 'close', 'volume'], dtype='object')


In [3]:
if False:
    processor = CCXTEngineer()
    ccxt_df = processor.data_fetch(TRAIN_START_DATE, EVAL_END_DATE, period="1h")
    ccxt_df.columns = train_df.columns
    print(ccxt_df.columns)
    print(train_df.columns)
    train_aligned, ccxt_aligned = train_df.align(ccxt_df, join='inner')
    comparison_result = (train_aligned - ccxt_aligned)
    print(comparison_result)
    


In [4]:
INDICATORS = [
    'macd',          # MACD (默认 12, 26)
    'boll_ub',       # 布林带上轨 (默认 20)
    'boll_lb',       # 布林带下轨 (默认 20)
    'rsi_30',        # 30周期 RSI
    'dx_30',         # 30周期 DX (动向指标)
    'close_30_sma',  # 30周期收盘价简单移动平均
    'close_60_sma'   # 60周期收盘价简单移动平均
]
def add_indicators(df, indicators):
    sdf = Sdf.retype(df.copy())
    for ind in indicators:
        _ = sdf[ind]
    out = pd.DataFrame(index=df.index)
    out[df.columns] = df
    out[indicators] = sdf[indicators]
    return out

aug_train_df = add_indicators(train_df, INDICATORS)
aug_eval_df = add_indicators(eval_df, INDICATORS)
aug_train_df = aug_train_df.iloc[100:]
aug_eval_df = aug_eval_df.iloc[100:]

print(aug_train_df)

aug_train_df.to_csv("train_data.csv")
aug_eval_df.to_csv("trade_data.csv")


                             open          high           low         close  \
ri                                                                            
2022-06-05 04:00:00  29780.900391  29812.000000  29725.500000  29756.199219   
2022-06-05 05:00:00  29755.800781  29767.800781  29670.699219  29704.900391   
2022-06-05 06:00:00  29705.000000  29809.699219  29705.000000  29777.300781   
2022-06-05 07:00:00  29777.199219  29779.300781  29718.000000  29721.199219   
2022-06-05 08:00:00  29721.300781  29800.000000  29630.300781  29630.599609   
...                           ...           ...           ...           ...   
2024-12-31 18:00:00  94832.796875  95020.000000  93145.101562  94082.796875   
2024-12-31 19:00:00  94082.796875  94376.601562  93764.703125  93865.101562   
2024-12-31 20:00:00  93865.203125  94278.296875  93680.000000  94155.796875   
2024-12-31 21:00:00  94155.796875  94218.296875  93426.101562  93544.898438   
2024-12-31 22:00:00  93544.898438  93950.000000  934