In [95]:
import pandas as pd
import numpy as np
import time
from icecream import ic
import pandas_ta as ta
from tqdm.notebook import tqdm
import talib
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
import vectorbt as vbt
import joblib
import re
from optuna.importance import get_param_importances
import csv
import warnings
from datetime import datetime
from collections import deque
# 忽略所有警告
warnings.filterwarnings("ignore")
warnings.filterwarnings('ignore', category=UserWarning, module='lightgbm')

In [90]:
def min_index(window):
    # 获取当前窗口内最小值的索引
    if window.isna().all():  # 如果当前窗口都是NaN，返回NaN
        return None
    return window.idxmin()
def max_index(window):
    # 获取当前窗口内最小值的索引
    if window.isna().all():  # 如果当前窗口都是NaN，返回NaN
        return None
    return window.idxmax()
# 将apply应用于滚动窗口

def find_local_extrema(df, window):
    # 用rolling window找局部极小值和极大值
    local_min = df['close'].rolling(window=window, center=True).min()
    local_max = df['close'].rolling(window=window, center=True).max()
    min_idx = df['close'].rolling(window=window, center=True).apply(min_index, raw=False)
    max_idx = df['close'].rolling(window=window, center=True).apply(max_index, raw=False)
    return local_min, local_max, min_idx, max_idx

def generate_trade_signals(df, window, r, n):
    # 事先计算局部最小值、最大值和对应的索引
    local_min, local_max, min_idx, max_idx = find_local_extrema(df, window)

    # 创建信号列
    df['signal'] = 0
    df['position'] = 0
    position = 0
    price = 0
    return_rate = 0
    return_rates = []

    # 避免在循环中频繁使用 `df.at` 进行逐行修改，提前将信号存入数组
    signals = np.zeros(len(df))
    positions = np.zeros(len(df))

    # 循环遍历数据，寻找买入和卖出信号
    for i in range(int(window / 2), len(df) - int(window / 2)):  # 跳过前后window期
        positions[i] = position
        if positions[i] == 0:
            if df['close'].at[i] == local_min.at[i]:  # 当前点是局部最低点
                # 判断是否是买入信号
                if min_idx.at[i] < max_idx.at[i]:  # 如果局部最低点更接近开盘点（即之前出现过低点）
                    signals[i] = 1  # 买入信号
                    position = 1  # 多单
                    price = df['close'].at[i]  # 记录价格
            elif df['close'].at[i] == local_max.at[i]:  # 当前点是局部最高点
                # 判断是否是卖出信号
                if max_idx.at[i] > min_idx.at[i]:  # 如果局部最高点更接近开盘点
                    signals[i] = -1  # 卖出信号
                    position = -1  # 空单
                    price = df['close'].at[i]  # 记录价格

        # 判断止损或止盈
        elif positions[i] == 1:  # 如果是多单
            if df['close'].at[i] <= price * r:
                signals[i] = -1  # 卖出信号
                return_rate = (df['close'].at[i] / price - 1) * n
                return_rates.append(return_rate)
                position = 0  # 清仓
            elif df['close'].at[i] == local_max.at[i]:
                signals[i] = -1  # 卖出信号
                return_rate = (df['close'].at[i] / price - 1) * n
                return_rates.append(return_rate)
                position = 0  # 清仓

        elif positions[i] == -1:  # 如果是空单
            if price <= df['close'].at[i] * r:
                signals[i] = 1  # 买入信号，平空仓
                return_rate = (price / df['close'].at[i] - 1) * n
                return_rates.append(return_rate)
                position = 0  # 清仓
            elif df['close'].at[i] == local_min.at[i]:
                signals[i] = 1  # 买入信号，平空仓
                return_rate = (price / df['close'].at[i] - 1) * n
                return_rates.append(return_rate)
                position = 0  # 清仓

    # 将信号和仓位添加回 DataFrame
    df['signal'] = signals
    df['position'] = positions

    return df, return_rates

In [55]:
# df = pd.read_csv('C:\pythonProject\DOGEUSDT.csv', encoding='utf-8-sig')
df = pd.read_csv('D:\DOGEUSDT.csv', encoding='utf-8-sig')

In [146]:
df2 = df
df2['timestamp'] = pd.to_datetime(df2['timestamp'])
df2

Unnamed: 0,timestamp,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,taker_sell_base_asset_volume,taker_sell_quote_asset_volume,taker_buy_sell_volume,taker_buy_sell_quote_asset_volume,local_min,local_max,min_idx,max_idx,signal,position
0,2024-09-07 19:01:00,0.09419,0.09424,0.09419,0.09421,793070,74718.99823,707,385310,36302.39951,407760,38416.59872,-22450,-2114.19921,,,,,0.0,0.0
1,2024-09-07 19:02:00,0.09421,0.09421,0.09410,0.09413,747305,70361.83392,406,328807,30955.86591,418498,39405.96801,-89691,-8450.10210,,,,,0.0,0.0
2,2024-09-07 19:03:00,0.09413,0.09416,0.09409,0.09415,1834230,172679.97070,437,1650595,155394.07950,183635,17285.89120,1466960,138108.18830,,,,,0.0,0.0
3,2024-09-07 19:04:00,0.09416,0.09421,0.09415,0.09419,1123645,105832.91100,467,429589,40460.87287,694056,65372.03813,-264467,-24911.16526,,,,,0.0,0.0
4,2024-09-07 19:05:00,0.09419,0.09422,0.09413,0.09422,1050421,98915.23366,702,239439,22549.16096,810982,76366.07270,-571543,-53816.91174,,,,,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172845,2025-01-05 19:46:00,0.38346,0.38354,0.38320,0.38354,1079425,413874.25630,910,697291,267359.52940,382134,146514.72690,315157,120844.80250,,,,,0.0,0.0
172846,2025-01-05 19:47:00,0.38353,0.38359,0.38320,0.38331,423009,162188.43010,735,208956,80115.06906,214053,82073.36104,-5097,-1958.29198,,,,,0.0,0.0
172847,2025-01-05 19:48:00,0.38331,0.38344,0.38323,0.38344,497276,190621.12470,677,163543,62695.32911,333733,127925.79559,-170190,-65230.46648,,,,,0.0,0.0
172848,2025-01-05 19:49:00,0.38344,0.38350,0.38310,0.38310,629479,241312.88290,693,278198,106661.61650,351281,134651.26640,-73083,-27989.64990,,,,,0.0,0.0


In [147]:
df2['taker_sell_base_asset_volume'] = df2['volume'] - df2['taker_buy_base_asset_volume']
df2['taker_sell_quote_asset_volume'] = df2['quote_asset_volume'] - df2['taker_buy_quote_asset_volume']
df2['taker_buy_sell_volume'] = df2['taker_buy_base_asset_volume'] - df2['taker_sell_base_asset_volume']
df2['taker_buy_sell_quote_asset_volume'] = df2['taker_buy_quote_asset_volume'] - df2['taker_sell_quote_asset_volume']
df2

Unnamed: 0,timestamp,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,taker_sell_base_asset_volume,taker_sell_quote_asset_volume,taker_buy_sell_volume,taker_buy_sell_quote_asset_volume,local_min,local_max,min_idx,max_idx,signal,position
0,2024-09-07 19:01:00,0.09419,0.09424,0.09419,0.09421,793070,74718.99823,707,385310,36302.39951,407760,38416.59872,-22450,-2114.19921,,,,,0.0,0.0
1,2024-09-07 19:02:00,0.09421,0.09421,0.09410,0.09413,747305,70361.83392,406,328807,30955.86591,418498,39405.96801,-89691,-8450.10210,,,,,0.0,0.0
2,2024-09-07 19:03:00,0.09413,0.09416,0.09409,0.09415,1834230,172679.97070,437,1650595,155394.07950,183635,17285.89120,1466960,138108.18830,,,,,0.0,0.0
3,2024-09-07 19:04:00,0.09416,0.09421,0.09415,0.09419,1123645,105832.91100,467,429589,40460.87287,694056,65372.03813,-264467,-24911.16526,,,,,0.0,0.0
4,2024-09-07 19:05:00,0.09419,0.09422,0.09413,0.09422,1050421,98915.23366,702,239439,22549.16096,810982,76366.07270,-571543,-53816.91174,,,,,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172845,2025-01-05 19:46:00,0.38346,0.38354,0.38320,0.38354,1079425,413874.25630,910,697291,267359.52940,382134,146514.72690,315157,120844.80250,,,,,0.0,0.0
172846,2025-01-05 19:47:00,0.38353,0.38359,0.38320,0.38331,423009,162188.43010,735,208956,80115.06906,214053,82073.36104,-5097,-1958.29198,,,,,0.0,0.0
172847,2025-01-05 19:48:00,0.38331,0.38344,0.38323,0.38344,497276,190621.12470,677,163543,62695.32911,333733,127925.79559,-170190,-65230.46648,,,,,0.0,0.0
172848,2025-01-05 19:49:00,0.38344,0.38350,0.38310,0.38310,629479,241312.88290,693,278198,106661.61650,351281,134651.26640,-73083,-27989.64990,,,,,0.0,0.0


In [151]:


# 计算MACD的函数
def calculate_macd(df_15m, fast, slow, signal):
    try:
        _,_,macd = talib.MACD(df_15m['close'], fastperiod=fast, slowperiod=slow, signalperiod=signal)
        macd_diff = macd.diff()
    except:
        macd = pd.Series([np.nan], index=[df_15m.index[-1]])  # 保持与数据结构一致
        macd_diff = pd.Series([np.nan], index=[df_15m.index[-1]])
    return macd.iat[-1], macd_diff.iat[-1]

# 计算RSI的函数
def calculate_rsi(df_15m, n_rsi):
    try:
        rsi = talib.RSI(df_15m['close'], timeperiod=n_rsi)
        rsi_diff = rsi.diff()
    except:
        rsi = pd.Series([np.nan], index=[df_15m.index[-1]])  # 保持与数据结构一致
        rsi_diff = pd.Series([np.nan], index=[df_15m.index[-1]])
    return rsi.iat[-1], rsi_diff.iat[-1]


# 计算ATR的函数
def calculate_atr(df_15m, n):
    try:
        atr = talib.ATR(df_15m['high'], df_15m['low'], df_15m['close'], timeperiod=n)
        current_2atr_stop_rate = round(atr.iat[-1] * 2 / df_15m['close'].iat[-1] * -1, 5)
        current_4atr_stop_rate = round(atr.iat[-1] * 4 / df_15m['close'].iat[-1] * -1, 5)
    except:
        atr = pd.Series([np.nan], index=[df_15m.index[-1]])
        current_2atr_stop_rate = pd.Series([np.nan], index=[df_15m.index[-1]])
        current_4atr_stop_rate = pd.Series([np.nan], index=[df_15m.index[-1]])
    return atr.iat[-1], current_2atr_stop_rate, current_4atr_stop_rate

def calculate_sar(df_15m, acceleration, maximum):
    try:
        sar = talib.SAR(df_15m['high'], df_15m['low'], acceleration, maximum)
        current_sar_diff = df_15m['close'].iat[-1] - sar.iat[-1]
        current_sar_rate = round(df_15m['close'].iat[-1] / sar.iat[-1] - 1, 5)
    except:
        sar = pd.Series([np.nan], index=[df_15m.index[-1]])
        current_sar_diff = pd.Series([np.nan], index=[df_15m.index[-1]])
        current_sar_rate = pd.Series([np.nan], index=[df_15m.index[-1]])
    return sar.iat[-1], current_sar_diff, current_sar_rate

def calculate_current_last_high_rate(df_15m, n):
    try:
        price_rate = round(df_15m['close'].iat[-1] / df_15m['close'].iat[n] - 1, 5)
    except:
        price_rate = np.nan
    return price_rate
df_15m = pd.DataFrame(columns=['timestamp', 'open', 'high', 'low', 'close', 'volume',
       'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume',
       'taker_buy_quote_asset_volume','taker_sell_base_asset_volume','taker_sell_quote_asset_volume','taker_buy_sell_volume','taker_buy_sell_quote_asset_volume'])

current_data = {
    'timestamp': df2['timestamp'].iat[0],
    'open': df2['open'].iat[0],
    'high': df2['high'].iat[0],
    'low': df2['low'].iat[0],
    'close': df2['close'].iat[0],
    'volume': df2['volume'].iat[0],
    'quote_asset_volume': df2['quote_asset_volume'].iat[0],
    'number_of_trades': df2['number_of_trades'].iat[0],
    'taker_buy_base_asset_volume': df2['taker_buy_base_asset_volume'].iat[0],
    'taker_buy_quote_asset_volume': df2['taker_buy_quote_asset_volume'].iat[0],
    'taker_sell_base_asset_volume': df2['taker_sell_base_asset_volume'].iat[0],
    'taker_sell_quote_asset_volume': df2['taker_sell_quote_asset_volume'].iat[0],
    'taker_buy_sell_volume': df2['taker_buy_sell_volume'].iat[0],
    'taker_buy_sell_quote_asset_volume': df2['taker_buy_sell_quote_asset_volume'].iat[0]
}
last_volume = current_data['volume']
last_quote_asset_volume = current_data['quote_asset_volume']
last_number_of_trades = current_data['number_of_trades']
last_taker_buy_base_asset_volume = current_data['taker_buy_base_asset_volume']
last_taker_buy_quote_asset_volume = current_data['taker_buy_quote_asset_volume']
last_taker_sell_base_asset_volume = current_data['taker_sell_base_asset_volume']
last_taker_sell_quote_asset_volume = current_data['taker_sell_quote_asset_volume']
last_taker_buy_sell_volume = current_data['taker_buy_sell_volume']
last_taker_buy_sell_quote_asset_volume = current_data['taker_buy_sell_quote_asset_volume']
df_15m = df_15m._append(current_data, ignore_index=True)

#转为当日开始时分
day0 = df2['timestamp'].iat[0].normalize()
new_rows = []
# 遍历数据
for i in tqdm(range(len(df2))):
    row = df2.iloc[i]
    # 检查是否是新15分钟段的开始（0, 15, 30, 45分钟）
    if row['timestamp'].minute % 15 == 0:
        last_volume = current_data['volume']
        last_quote_asset_volume = current_data['quote_asset_volume']
        last_number_of_trades = current_data['number_of_trades']
        last_taker_buy_base_asset_volume = current_data['taker_buy_base_asset_volume']
        last_taker_buy_quote_asset_volume = current_data['taker_buy_quote_asset_volume']
        last_taker_sell_base_asset_volume = current_data['taker_sell_base_asset_volume']
        last_taker_sell_quote_asset_volume = current_data['taker_sell_quote_asset_volume']
        last_taker_buy_sell_volume = current_data['taker_buy_sell_volume']
        last_taker_buy_sell_quote_asset_volume = current_data['taker_buy_sell_quote_asset_volume']
        # 开始新的15分钟段
        current_data = {
            'timestamp': row['timestamp'],
            'open': row['open'],
            'high': row['high'],
            'low': row['low'],
            'close': row['close'],
            'volume': row['volume'],
            'quote_asset_volume': row['quote_asset_volume'],
            'number_of_trades': row['number_of_trades'],
            'taker_buy_base_asset_volume': row['taker_buy_base_asset_volume'],
            'taker_buy_quote_asset_volume': row['taker_buy_quote_asset_volume'],
            'taker_sell_base_asset_volume': row['taker_sell_base_asset_volume'],
            'taker_sell_quote_asset_volume': row['taker_sell_quote_asset_volume'],
            'taker_buy_sell_volume': row['taker_buy_sell_volume'],
            'taker_buy_sell_quote_asset_volume': row['taker_buy_sell_quote_asset_volume']
        }
        df_15m = df_15m._append(current_data, ignore_index=True)
        if len(df_15m) > 100:
            df_15m = df_15m.tail(100)
    else:
        # 更新当前15分钟段的数据
        current_data['high'] = max(current_data['high'], row['high'])
        current_data['low'] = min(current_data['low'], row['low'])
        current_data['close'] = row['close']
        current_data['volume'] += row['volume']
        current_data['quote_asset_volume'] += row['quote_asset_volume']
        current_data['number_of_trades'] += row['number_of_trades']
        current_data['taker_buy_base_asset_volume'] += row['taker_buy_base_asset_volume']
        current_data['taker_buy_quote_asset_volume'] += row['taker_buy_quote_asset_volume']
        current_data['taker_sell_base_asset_volume'] += row['taker_sell_base_asset_volume']
        current_data['taker_sell_quote_asset_volume'] += row['taker_sell_quote_asset_volume']
        current_data['taker_buy_sell_volume'] += row['taker_buy_sell_volume']
        current_data['taker_buy_sell_quote_asset_volume'] += row['taker_buy_sell_quote_asset_volume']
    df_15m.iloc[-1] = current_data
    day = (df2['timestamp'].iat[i]-day0).days
    hour = df2['timestamp'].iat[i].hour
    quarter = df2['timestamp'].iat[i].minute // 15
    past_min = df2['timestamp'].iat[i].minute % 15
    current_open_diff = df_15m['close'].iat[-1] - df_15m['open'].iat[-1]
    current_open_rate = round(df_15m['close'].iat[-1] / df_15m['open'].iat[-1] - 1, 5)
    current_low_diff = df_15m['close'].iat[-1] - df_15m['low'].iat[-1]
    current_low_rate = round(df_15m['close'].iat[-1] / df_15m['low'].iat[-1] - 1, 5)
    current_high_diff = df_15m['close'].iat[-1] - df_15m['high'].iat[-1]
    current_high_rate = round(df_15m['close'].iat[-1] / df_15m['high'].iat[-1] - 1, 5)
    try:
        current_last_low_diff = df_15m['close'].iat[-1] - df_15m['low'].iat[-2]
        current_last_low_rate = round(df_15m['close'].iat[-1] / df_15m['low'].iat[-2] - 1, 5)
        current_last_high_diff = df_15m['close'].iat[-1] - df_15m['high'].iat[-2]
        current_last_high_rate = round(df_15m['close'].iat[-1] / df_15m['high'].iat[-2] - 1, 5)
    except:
        current_last_low_diff = np.nan
        current_last_low_rate = np.nan
        current_last_high_diff = np.nan
        current_last_high_rate = np.nan

    price_rate_15min = calculate_current_last_high_rate(df_15m, -2)
    price_rate_30min = calculate_current_last_high_rate(df_15m, -3)
    price_rate_1h = calculate_current_last_high_rate(df_15m, -5)
    price_rate_2h = calculate_current_last_high_rate(df_15m, -9)
    price_rate_3h = calculate_current_last_high_rate(df_15m, -13)
    price_rate_4h = calculate_current_last_high_rate(df_15m, -17)
    price_rate_6h = calculate_current_last_high_rate(df_15m, -25)
    price_rate_8h = calculate_current_last_high_rate(df_15m, -33)
    price_rate_12h = calculate_current_last_high_rate(df_15m, -49)
    price_rate_24h = calculate_current_last_high_rate(df_15m, -97)
    macd1, macd1_diff = calculate_macd(df_15m, 12, 26, 9)
    macd2, macd2_diff = calculate_macd(df_15m, 6, 13, 5)
    macd3, macd3_diff = calculate_macd(df_15m, 10, 20, 7)
    macd4, macd4_diff = calculate_macd(df_15m, 19, 39, 9)
    macd5, macd5_diff = calculate_macd(df_15m, 5, 15, 3)
    rsi1, rsi1_diff = calculate_rsi(df_15m, 5)
    rsi2, rsi2_diff = calculate_rsi(df_15m, 6)
    rsi3, rsi3_diff = calculate_rsi(df_15m, 7)
    rsi4, rsi4_diff = calculate_rsi(df_15m, 9)
    rsi5, rsi5_diff = calculate_rsi(df_15m, 14)
    rsi6, rsi6_diff = calculate_rsi(df_15m, 21)
    rsi7, rsi7_diff = calculate_rsi(df_15m, 24)
    rsi8, rsi8_diff = calculate_rsi(df_15m, 28)
    atr1,atr1_2_stop_rate,atr1_4_stop_rate = calculate_atr(df_15m, 7)
    atr2,atr2_2_stop_rate,atr2_4_stop_rate = calculate_atr(df_15m, 14)
    atr3,atr3_2_stop_rate,atr3_4_stop_rate = calculate_atr(df_15m, 21)
    sar1, sar1_diff, sar1_rate = calculate_sar(df_15m, 0.02, 0.2)
    volume_diff = current_data['volume'] - last_volume
    volume_rate = round(current_data['volume'] / last_volume - 1, 5)
    quote_asset_volume_diff = current_data['quote_asset_volume'] - last_quote_asset_volume
    quote_asset_volume_rate = round(current_data['quote_asset_volume'] / last_quote_asset_volume - 1, 5)
    number_of_trades_diff = current_data['number_of_trades'] - last_number_of_trades
    number_of_trades_rate = round(current_data['number_of_trades'] / last_number_of_trades - 1, 5)
    taker_buy_base_asset_volume_diff = current_data['taker_buy_base_asset_volume'] - last_taker_buy_base_asset_volume
    taker_buy_base_asset_volume_rate = round(current_data['taker_buy_base_asset_volume'] / last_taker_buy_base_asset_volume - 1, 5)
    taker_buy_quote_asset_volume_diff = current_data['taker_buy_quote_asset_volume'] - last_taker_buy_quote_asset_volume
    taker_buy_quote_asset_volume_rate = round(current_data['taker_buy_quote_asset_volume'] / last_taker_buy_quote_asset_volume - 1, 5)
    taker_sell_base_asset_volume_diff = current_data['taker_sell_base_asset_volume'] - last_taker_sell_base_asset_volume
    taker_sell_base_asset_volume_rate = round(current_data['taker_sell_base_asset_volume'] / last_taker_sell_base_asset_volume - 1, 5)
    taker_sell_quote_asset_volume_diff = current_data['taker_sell_quote_asset_volume'] - last_taker_sell_quote_asset_volume
    taker_sell_quote_asset_volume_rate = round(current_data['taker_sell_quote_asset_volume'] / last_taker_sell_quote_asset_volume - 1, 5)
    #买卖数量和额度差比
    taker_buy_sell_volume_diff = current_data['taker_buy_sell_volume'] - last_taker_buy_sell_volume
    taker_buy_sell_volume_rate = round(current_data['taker_buy_sell_volume'] / last_taker_buy_sell_volume - 1, 5)
    taker_buy_sell_quote_asset_volume_diff = current_data['taker_buy_sell_quote_asset_volume'] - last_taker_buy_sell_quote_asset_volume
    taker_buy_sell_quote_asset_volume_rate = round(current_data['taker_buy_sell_quote_asset_volume'] / last_taker_buy_sell_quote_asset_volume - 1, 5)
    current_taker_buy_sell_volume_rate = round(current_data['taker_buy_base_asset_volume'] / current_data['taker_sell_base_asset_volume'] - 1, 5)
    current_buy_sell_quote_asset_volume_rate = round(current_data['taker_buy_quote_asset_volume'] / current_data['taker_sell_quote_asset_volume'] - 1, 5)
    new_row = {
        'timestamp': row['timestamp'],
        'day': day,
        'hour': hour,
        'quarter': quarter,  # 15分钟区间
        'past_min': past_min,
        'open': row['open'],
        'high': row['high'],
        'low': row['low'],
        'close': row['close'],
        'volume': row['volume'],
        'quote_asset_volume': row['quote_asset_volume'],
        'number_of_trades': row['number_of_trades'],
        'taker_buy_base_asset_volume': row['taker_buy_base_asset_volume'],
        'taker_buy_quote_asset_volume': row['taker_buy_quote_asset_volume'],
        'taker_sell_base_asset_volume': row['taker_sell_base_asset_volume'],
        'taker_sell_quote_asset_volume': row['taker_sell_quote_asset_volume'],
        'taker_buy_sell_volume': row['taker_buy_sell_volume'],
        'taker_buy_sell_quote_asset_volume': row['taker_buy_sell_quote_asset_volume'],
        'current_open_diff': current_open_diff,  
        'current_open_rate': current_open_rate,  
        'current_low_diff': current_low_diff,  
        'current_low_rate': current_low_rate,  
        'current_high_diff': current_high_diff,  
        'current_high_rate': current_high_rate,  
        'current_last_low_diff': current_last_low_diff,  
        'current_last_low_rate': current_last_low_rate,  
        'current_last_high_diff': current_last_high_diff,  
        'current_last_high_rate': current_last_high_rate,  
        'price_rate_15min': price_rate_15min,  
        'price_rate_30min': price_rate_30min,  
        'price_rate_1h': price_rate_1h,  
        'price_rate_2h': price_rate_2h,  
        'price_rate_3h': price_rate_3h,  
        'price_rate_4h': price_rate_4h,  
        'price_rate_6h': price_rate_6h,  
        'price_rate_8h': price_rate_8h,  
        'price_rate_12h': price_rate_12h,  
        'price_rate_24h': price_rate_24h,  
        'macd1': macd1,  
        'macd1_diff': macd1_diff,  
        'macd2': macd2,  
        'macd2_diff': macd2_diff,  
        'macd3': macd3,  
        'macd3_diff': macd3_diff,  
        'macd4': macd4,  
        'macd4_diff': macd4_diff,  
        'macd5': macd5,  
        'macd5_diff': macd5_diff,  
        'rsi1': rsi1,  
        'rsi1_diff': rsi1_diff,  
        'rsi2': rsi2,  
        'rsi2_diff': rsi2_diff,  
        'rsi3': rsi3,  
        'rsi3_diff': rsi3_diff,  
        'rsi4': rsi4,  
        'rsi4_diff': rsi4_diff,  
        'rsi5': rsi5,  
        'rsi5_diff': rsi5_diff,  
        'rsi6': rsi6,  
        'rsi6_diff': rsi6_diff,  
        'rsi7': rsi7,  
        'rsi7_diff': rsi7_diff,  
        'rsi8': rsi8,  
        'rsi8_diff': rsi8_diff,  
        'atr1': atr1,  
        'atr1_2_stop_rate': atr1_2_stop_rate,
        'atr1_4_stop_rate': atr1_4_stop_rate,
        'atr2': atr2,
        'atr2_2_stop_rate': atr2_2_stop_rate,
        'atr2_4_stop_rate': atr2_4_stop_rate,
        'atr3': atr3,
        'atr3_2_stop_rate': atr3_2_stop_rate,
        'atr3_4_stop_rate': atr3_4_stop_rate,
        'sar1': sar1,
        'sar1_diff': sar1_diff,
        'sar1_rate': sar1_rate,
        'volume_diff': volume_diff,  
        'volume_rate': volume_rate,  
        'quote_asset_volume_diff': quote_asset_volume_diff,  
        'quote_asset_volume_rate': quote_asset_volume_rate,  
        'number_of_trades_diff': number_of_trades_diff,  
        'number_of_trades_rate': number_of_trades_rate,  
        'taker_buy_base_asset_volume_diff': taker_buy_base_asset_volume_diff,  
        'taker_buy_base_asset_volume_rate': taker_buy_base_asset_volume_rate,  
        'taker_buy_quote_asset_volume_diff': taker_buy_quote_asset_volume_diff,  
        'taker_buy_quote_asset_volume_rate': taker_buy_quote_asset_volume_rate,  
        'taker_sell_base_asset_volume_diff': taker_sell_base_asset_volume_diff,  
        'taker_sell_base_asset_volume_rate': taker_sell_base_asset_volume_rate,  
        'taker_sell_quote_asset_volume_diff': taker_sell_quote_asset_volume_diff,  
        'taker_sell_quote_asset_volume_rate': taker_sell_quote_asset_volume_rate,  
        'taker_buy_sell_volume_diff': taker_buy_sell_volume_diff,  
        'taker_buy_sell_volume_rate': taker_buy_sell_volume_rate,  
        'taker_buy_sell_quote_asset_volume_diff': taker_buy_sell_quote_asset_volume_diff,  
        'taker_buy_sell_quote_asset_volume_rate': taker_buy_sell_quote_asset_volume_rate,  
        'current_taker_buy_sell_volume_rate': current_taker_buy_sell_volume_rate,  
        'current_buy_sell_quote_asset_volume_rate': current_buy_sell_quote_asset_volume_rate  
    }

    # 添加新行到lgb_future DataFrame中
    new_rows.append(new_row)

# 找到包含 'diff' 的列
lgb_future = pd.DataFrame(new_rows)
diff_columns = [col for col in lgb_future.columns if 'diff' in col]

lgb_future[diff_columns] = lgb_future[diff_columns].round(6)


  0%|          | 0/172850 [00:00<?, ?it/s]

In [154]:
lgb_future

Unnamed: 0,timestamp,day,hour,quarter,past_min,open,high,low,close,volume,...,taker_sell_base_asset_volume_diff,taker_sell_base_asset_volume_rate,taker_sell_quote_asset_volume_diff,taker_sell_quote_asset_volume_rate,taker_buy_sell_volume_diff,taker_buy_sell_volume_rate,taker_buy_sell_quote_asset_volume_diff,taker_buy_sell_quote_asset_volume_rate,current_taker_buy_sell_volume_rate,current_buy_sell_quote_asset_volume_rate
0,2024-09-07 19:01:00,0,19,0,1,0.09419,0.09424,0.09419,0.09421,793070,...,407760,1.00000,3.841660e+04,1.00000,-22450,1.00000,-2.114199e+03,1.00000,-0.05506,-0.05503
1,2024-09-07 19:02:00,0,19,0,2,0.09421,0.09421,0.09410,0.09413,747305,...,826258,2.02633,7.782257e+04,2.02575,-112141,4.99514,-1.056430e+04,4.99683,-0.10907,-0.10907
2,2024-09-07 19:03:00,0,19,0,3,0.09413,0.09416,0.09409,0.09415,1834230,...,1009893,2.47668,9.510846e+04,2.47571,1354819,-60.34829,1.275439e+05,-60.32728,0.93984,0.93937
3,2024-09-07 19:04:00,0,19,0,4,0.09416,0.09421,0.09415,0.09419,1123645,...,1703949,4.17880,1.604805e+05,4.17737,1090352,-48.56802,1.026327e+05,-48.54449,0.50571,0.50538
4,2024-09-07 19:05:00,0,19,0,5,0.09419,0.09422,0.09413,0.09422,1050421,...,2514931,6.16767,2.368466e+05,6.16521,518809,-23.10953,4.881581e+04,-23.08950,0.16983,0.16966
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172845,2025-01-05 19:46:00,120,19,3,1,0.38346,0.38354,0.38320,0.38354,1079425,...,-16629788,-0.90454,-6.369729e+06,-0.90445,4815438,-0.87904,1.843329e+06,-0.87885,-0.37758,-0.37758
172846,2025-01-05 19:47:00,120,19,3,2,0.38353,0.38359,0.38320,0.38331,423009,...,-16415735,-0.89290,-6.287656e+06,-0.89279,4810341,-0.87811,1.841370e+06,-0.87792,-0.33912,-0.33913
172847,2025-01-05 19:48:00,120,19,3,3,0.38331,0.38344,0.38323,0.38344,497276,...,-16082002,-0.87475,-6.159730e+06,-0.87463,4640151,-0.84704,1.776140e+06,-0.84682,-0.36388,-0.36388
172848,2025-01-05 19:49:00,120,19,3,4,0.38344,0.38350,0.38310,0.38310,629479,...,-15730721,-0.85564,-6.025079e+06,-0.85551,4567068,-0.83370,1.748150e+06,-0.83347,-0.34325,-0.34323


In [156]:
lgb_future.to_csv(r'D:\lgb_future.csv',encoding='utf-8-sig', index=False)
# lgb_future = pd.read_csv(r'C:\pythonProject\lgb_future.csv',encoding='utf-8-sig')

In [None]:
# 定义目标函数
def best_signal(trial, df2):
    windows = trial.suggest_int('windows', 3, 1441)
    r = trial.suggest_float('r', 0.5, 1, step=0.001)
    n = trial.suggest_int('n', 5, 10)
    df2, return_rates= generate_trade_signals(df2, windows, r, n)
    return_series = pd.Series(return_rates)
    min_loss = 1
    for i in range(len(return_series)):
        if return_series.loc[i] < 0:
            loss = min((1 + return_series.loc[i:]).cumprod()-1)
            if loss < min_loss:
                min_loss = loss
    victory_rate = return_series.map(lambda x: 1 if x> 0 else 0).mean()
    return len(return_series), min_loss, victory_rate

# 进行超参数优化
study = optuna.create_study(directions=["maximize", "maximize", "maximize"])
def progress_bar_callback(study, trial):
    # 更新进度条
    progress_bar.update(1)

# 使用 tqdm 创建一个进度条
progress_bar = tqdm(total=100, desc="Optimizing", ncols=100)

study.optimize(lambda trial: best_signal(trial, df2), n_trials=100, n_jobs=8, callbacks=[progress_bar_callback])

# 获取参数组合及对应的收益
param_combinations = [(trial.params, trial.values) for trial in study.trials]

# 过滤出收益率大于1000%且胜率大于50%的组合
filtered_combinations = [
    (params, values)
    for params, values in param_combinations
    if values != None
]

# 根据胜率从高到低排序
filtered_combinations.sort(key=lambda x: x[1][1], reverse=True)

# 输出最佳参数，包括交易次数
for i in range(min(10, len(filtered_combinations))):
    params, values = filtered_combinations[i]
    print(f"排名 {i + 1} 的参数组合: {[round(value, 3) for value in params.values()]}, "
          f"交易次数: {values[0]}, 最大损失数: {values[1]}, 胜率: {values[2]}")




In [157]:
df2, return_rates= generate_trade_signals(df2, 207, 0.891, 10)
return_series = pd.Series(return_rates)
min_loss = 1
for i in range(len(return_series)):
    if return_series.loc[i] < 0:
        loss = min((1 + return_series.loc[i:]).cumprod()-1)
        if loss < min_loss:
            min_loss = loss
victory_rate = return_series.map(lambda x: 1 if x> 0 else 0).mean()
print(min_loss, len(return_series), victory_rate)


-0.25553480928247607 349 0.9856733524355301


In [180]:
lgb_future['timestamp'] = pd.to_datetime(lgb_future['timestamp'])
lgb_future2 = lgb_future.merge(df2[['timestamp','signal']], how='left', on='timestamp')
lgb_future2['signal'].value_counts()

signal
 0.0    172152
 1.0       349
-1.0       349
Name: count, dtype: int64

In [182]:
def signal_process(lgb_future2, n):
    signals = lgb_future2['signal'].to_list()
    close = lgb_future2['close'].to_list()
    positions = np.zeros(len(lgb_future2))
    return_rates = np.zeros(len(lgb_future2))
    trade_prices = 0
    position = 0
    for i in tqdm(range(len(lgb_future2))):
        if position == 0:
            if signals[i] != 0:
                position = signals[i]
                positions[i] = position
                trade_prices = close[i]
        elif position == 1:
            positions[i] = position
            return_rates[i] = (close[i] / trade_prices - 1) * n
            if signals[i] == -1:
                position = 0
        elif position == -1:
            positions[i] = position
            return_rates[i] = (trade_prices / close[i] - 1) * n
            if signals[i] == 1:
                position = 0
    lgb_future2['position'] = positions
    lgb_future2['return_rate'] = return_rates
    return lgb_future2

In [183]:
lgb_future2 = signal_process(lgb_future2, 10)

  0%|          | 0/172850 [00:00<?, ?it/s]

In [186]:
lgb_future2

Unnamed: 0,timestamp,day,hour,quarter,past_min,open,high,low,close,volume,...,taker_sell_quote_asset_volume_rate,taker_buy_sell_volume_diff,taker_buy_sell_volume_rate,taker_buy_sell_quote_asset_volume_diff,taker_buy_sell_quote_asset_volume_rate,current_taker_buy_sell_volume_rate,current_buy_sell_quote_asset_volume_rate,signal,position,return_rate
0,2024-09-07 19:01:00,0,19,0,1,0.09419,0.09424,0.09419,0.09421,793070,...,1.00000,-22450,1.00000,-2.114199e+03,1.00000,-0.05506,-0.05503,0.0,0.0,0.0
1,2024-09-07 19:02:00,0,19,0,2,0.09421,0.09421,0.09410,0.09413,747305,...,2.02575,-112141,4.99514,-1.056430e+04,4.99683,-0.10907,-0.10907,0.0,0.0,0.0
2,2024-09-07 19:03:00,0,19,0,3,0.09413,0.09416,0.09409,0.09415,1834230,...,2.47571,1354819,-60.34829,1.275439e+05,-60.32728,0.93984,0.93937,0.0,0.0,0.0
3,2024-09-07 19:04:00,0,19,0,4,0.09416,0.09421,0.09415,0.09419,1123645,...,4.17737,1090352,-48.56802,1.026327e+05,-48.54449,0.50571,0.50538,0.0,0.0,0.0
4,2024-09-07 19:05:00,0,19,0,5,0.09419,0.09422,0.09413,0.09422,1050421,...,6.16521,518809,-23.10953,4.881581e+04,-23.08950,0.16983,0.16966,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172845,2025-01-05 19:46:00,120,19,3,1,0.38346,0.38354,0.38320,0.38354,1079425,...,-0.90445,4815438,-0.87904,1.843329e+06,-0.87885,-0.37758,-0.37758,0.0,0.0,0.0
172846,2025-01-05 19:47:00,120,19,3,2,0.38353,0.38359,0.38320,0.38331,423009,...,-0.89279,4810341,-0.87811,1.841370e+06,-0.87792,-0.33912,-0.33913,0.0,0.0,0.0
172847,2025-01-05 19:48:00,120,19,3,3,0.38331,0.38344,0.38323,0.38344,497276,...,-0.87463,4640151,-0.84704,1.776140e+06,-0.84682,-0.36388,-0.36388,0.0,0.0,0.0
172848,2025-01-05 19:49:00,120,19,3,4,0.38344,0.38350,0.38310,0.38310,629479,...,-0.85551,4567068,-0.83370,1.748150e+06,-0.83347,-0.34325,-0.34323,0.0,0.0,0.0


In [211]:
'''
定义平仓-开仓冷却，加仓间冷却，开仓-平仓冷却
平仓分两步，lgb模型发送平仓信号后，再用当前价格和滑动n个收盘价平均值对比，当前价格<滑动均价平仓
'''
def predicted_signal_process(lgb_future3, n, profit_rate, stop_rate, w1, w2, w3, w4, w5, w6, w7):
    position = 0
    trade_price = 0
    return_rate = 0
    positions = np.zeros(len(lgb_future3))
    trade_prices = np.zeros(len(lgb_future3))
    return_rates = np.zeros(len(lgb_future3))
    close = lgb_future3['close'].tolist()
    predicted_signal = lgb_future3['predicted_signal'].tolist()
    sar = lgb_future3['sar1'].tolist()
    stop_rate = [stop_rate] * len(lgb_future3)
    atr1_2 = lgb_future3['atr1_2_stop_rate'].tolist()
    atr1_4 = lgb_future3['atr1_4_stop_rate'].tolist()
    atr2_2 = lgb_future3['atr2_2_stop_rate'].tolist()
    atr2_4 = lgb_future3['atr2_4_stop_rate'].tolist()
    atr3_2 = lgb_future3['atr3_2_stop_rate'].tolist()
    atr3_4 = lgb_future3['atr3_4_stop_rate'].tolist()
    w_stop_rate = [
                (w1 * stop_rate[i] + w2 * atr1_2[i] + w3 * atr1_4[i] +
                w4 * atr2_2[i] + w5 * atr2_4[i] + w6 * atr3_2[i] +
                w7 * atr3_4[i]) * n
                for i in range(len(lgb_future3))
            ]
    for i in range(len(lgb_future3)):
        positions[i] = position
        return_rates[i] = return_rate
        trade_prices[i] = trade_price
        # 空仓时候
        if positions[i] == 0:
            if predicted_signal[i] == 1:
                trade_price = close[i]
                trade_prices[i] = trade_price
                position = 1
                positions[i] = position
            elif predicted_signal[i] == -1:
                trade_price = close[i]
                trade_prices[i] = trade_price
                position = -1
                positions[i] = position
        #多仓情况
        elif positions[i] == 1:
            try:
                return_rate = ((close[i] / trade_price) -1) * n
                return_rates[i] = return_rate
            except:
                pass
            if predicted_signal[i] != 0:
                if return_rate < w_stop_rate[i]: #止损强行平仓
                    predicted_signal[i] = -1
                    position = 0
                    trade_price = 0
                    return_rate = 0
                elif close[i] < sar[i] and return_rate > profit_rate * n: #sar反转并且大于固定止盈
                    predicted_signal[i] = -1
                    position = 0
                    trade_price = 0
                    return_rate = 0
                else:
                    predicted_signal[i] = 0
        #空仓情况
        elif positions[i] == -1:
            try:
                return_rate = ((trade_price / close[i]) -1) * n
                return_rates[i] = return_rate
            except:
                pass
            if predicted_signal[i] != 0:
                if return_rate < w_stop_rate[i]: #止损强行平仓
                    predicted_signal[i] = 1
                    position = 0
                    trade_price = 0
                    return_rate = 0
                elif close[i] > sar[i] and return_rate > profit_rate * n: #sar反转并且大于固定止盈
                    predicted_signal[i] = 1
                    position = 0
                    trade_price = 0
                    return_rate = 0
                else:
                    predicted_signal[i] = 0

    lgb_future3['position'] = positions
    lgb_future3['predicted_signal2'] = predicted_signal
    lgb_future3['trade_prices'] = trade_prices
    lgb_future3['return_rates'] = return_rates
    lgb_future3['w_stop_rate'] = w_stop_rate
    return lgb_future3
            
def vbt_process(lgb_future3):
# 初始化空的信号序列
    entries = np.zeros(len(lgb_future3), dtype=bool)
    exits = np.zeros(len(lgb_future3), dtype=bool)
    short_entries = np.zeros(len(lgb_future3), dtype=bool)
    short_exits = np.zeros(len(lgb_future3), dtype=bool)
    signals = lgb_future3['predicted_signal2'].to_list()
    # 初始化当前仓位状态
    is_long = None
    buy_index = 0
    # 遍历每个信号并生成买入/卖出信号
    for i in range(len(lgb_future3)):
        signal = signals[i]
        if is_long is None:
            if signal == 1:
                entries[i] = True
                is_long = True
                buy_index = i
            elif signal == -1:  # 如果当前持有空仓，则平空仓并开多仓
                short_entries[i] = True
                is_long = False
                buy_index = i
        elif signal == -1 and is_long == True:
            exits[i] = True
            is_long = None
        elif signal == 1 and is_long == False:
            short_exits[i] = True
            is_long = None
    if is_long != None:
        entries[buy_index] = False
        short_entries[buy_index] = False
    # 将信号集成到数据框架中
    lgb_future3['entries'] = entries
    lgb_future3['exits'] = exits
    lgb_future3['short_entries'] = short_entries
    lgb_future3['short_exits'] = short_exits
    # 计算每次交易的持仓量
    cash = 1  # 初始现金
    size = cash / lgb_future3['close']  # 每次买入的数量
    portfolio = vbt.Portfolio.from_signals(
        lgb_future3['close'],  # 使用 close 价格作为价格数据
        entries=entries,  # 买入信号
        exits=exits,  # 卖出信号
        short_entries=short_entries,  # 开空仓信号
        short_exits=short_exits,  # 平空仓信号
        init_cash=cash,  # 初始资金
        size=size,  # 每次买入的数量
        fees=0.0,  # 无佣金
        slippage=0.0,  # 不考虑滑点
        cash_sharing=True,  # 支持现金共享
        direction='both'
    )
    # 输出结果
    portfolio.total_return()
    trades = portfolio.trades.records
    return lgb_future3, trades
def simulate_trading_performance(trades, n):
    cash = 1
    min_loss = 0
    cashs = []
    for i in range(len(trades)):
        cashs.append(cash)
        cash = cash * (1 + trades.loc[i, 'return'] * n) * (1- 0.0001 * n)
    cashs_series = pd.Series(cashs)
    ratios = cashs_series.shift(-1) / cashs_series
    max_get = 0
    for i in range(len(ratios)):
        if ratios.loc[i] < 1:
            loss = min(ratios[i:].cumprod() - 1)
            if loss < min_loss:
                min_loss = loss
        if ratios.loc[i] > 1:
            get = max(ratios[i:].cumprod() - 1)
            if get > min_loss:
                max_get = get
    return cash, min_loss, max_get


In [196]:
# 假设 lgb_future2 包含你需要的特征数据
features = lgb_future2.columns.drop(['timestamp','signal']).tolist()
train_size = int(0.9 * len(lgb_future2))  # 70% 数据用于训练

train_data = lgb_future2[:train_size]  # 训练集
test_data = lgb_future2[train_size:]   # 测试集

X_train = train_data[features]
y_train = train_data['signal']
X_test = test_data[features]
y_test = test_data['signal']

# 定义目标函数
def objective(trial, lgb_future2):
    # 定义需要调优的超参数
    class_weight_1 = trial.suggest_int('class_weight_1', 100, 1000)
    lower_bound = max(100, int(class_weight_1 * 0.66))
    upper_bound = min(1000, int(class_weight_1 * 1.5))
    if lower_bound > upper_bound:
        lower_bound = upper_bound
    class_weight_0 = trial.suggest_int('class_weight_-1', lower_bound, lower_bound)
    param = {
    'objective': 'multiclass',  # 多分类
    'num_class': 3,  # 三分类
    'metric': 'multi_logloss',  # 多分类的对数损失，或使用 multi_error 或 accuracy
    'boosting_type': 'gbdt',  # 使用 GOSS 加速树构建
    'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),  # 0.001 到 0.1 之间
    'num_leaves': trial.suggest_int('num_leaves', 20, 256),  # 20 到 256 之间
    'max_depth': trial.suggest_int('max_depth', -1, 15),  # 最大深度，-1 表示没有限制
    'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),  # 每个叶子的最小样本数
    'max_bin': trial.suggest_int('max_bin', 100, 500),  # 特征分箱数
    'subsample': trial.suggest_float('subsample', 0.5, 1.0),  # 每棵树训练时使用的样本比例
    'lambda_l1': trial.suggest_float('lambda_l1', 0.0, 10.0),  # L1 正则化系数
    'lambda_l2': trial.suggest_float('lambda_l2', 0.0, 10.0),  # L2 正则化系数
    'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),  # 每棵树的样本采样比例
    'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),  # 每棵树使用的特征比例
    'boost_from_average': False,  # 对于多分类任务，通常设置为 False
    'random_state': 42,  # 随机种子

    # 类别不平衡时的权重调整
    'class_weight': {
        0: 1,
        1: class_weight_1,  # 对于类别1（如平仓）的权重
        -1: class_weight_0  # 对于类别-1（如买入）的权重
    },
    }
    weights = []
    for i in range(1, 8):
        remaining = 1 - sum(weights)
        if i < 7:  # 对于前6个权重
            max_possible = remaining - (7 - i) * 0.05  # 确保剩下的权重至少每个有0.05
            upper_bound = min(0.95, max_possible)
            lower_bound = max(0.05, remaining - (6 - i) * 0.95)

            # 如果 upper_bound 小于 lower_bound，则设置它们相等以避免无效范围
            if upper_bound < lower_bound:
                upper_bound = lower_bound

            # 提议权重时确保它在有效范围内
            w = trial.suggest_float(f'w{i}', lower_bound, upper_bound, step=0.01)
            weights.append(w)

            # 检查提议的权重是否导致剩余权重不足
            if remaining - w < (6 - i) * 0.05:
                raise ValueError(f"Proposed weight w{i} leads to insufficient remaining weight.")

        else:  # 最后一个权重
            weights.append(max(0.05, 1 - sum(weights)))  # 确保总和为1，并且不低于最小值
    # 将最终确定的权重赋值给单独的变量
    w1, w2, w3, w4, w5, w6, w7 = weights
    n = trial.suggest_int('n', 7, 10)
    #stop_rate固定止损
    min_stop_rate = max(-0.25 / n, -0.3)
    max_stop_rate = min(-0.1 / n, -0.01)
    stop_rate = trial.suggest_float('stop_rate', min_stop_rate, max_stop_rate, step=0.001)
    min_profit_rate = 0.05 / n
    max_profit_rate = 0.5 / n
    profit_rate = trial.suggest_float('profit_rate', min_profit_rate, min(max_profit_rate, 0.01), step=0.001)
    # 训练 LGBM 模型
    lgb_model = lgb.LGBMClassifier(**param)
    lgb_model.fit(X_train, y_train)
    y_pred  = lgb_model.predict(X_test)
    # 将预测信号加入回测数据
    lgb_future2.loc[X_test.index, 'predicted_signal'] = y_pred
    lgb_future3 = lgb_future2.dropna().reset_index(drop=True)

    lgb_future3 = predicted_signal_process(lgb_future3, n, profit_rate, stop_rate, w1, w2, w3, w4, w5, w6, w7)
    lgb_future3, trades = vbt_process(lgb_future3)
    cash, min_loss, max_get = simulate_trading_performance(trades, n)

    return cash, min_loss, len(trades)
# 进行超参数优化
study = optuna.create_study(directions=["maximize", "maximize","minimize"])
def progress_bar_callback(study, trial):
    # 更新进度条
    progress_bar.update(1)

# 使用 tqdm 创建一个进度条
progress_bar = tqdm(total=100, desc="Optimizing", ncols=100)

study.optimize(lambda trial: objective(trial, lgb_future2), n_trials=100, n_jobs=-1, callbacks=[progress_bar_callback])

# 获取参数组合及对应的收益
param_combinations = [(trial.params, trial.values) for trial in study.trials]

# 过滤出收益率大于1000%且胜率大于50%的组合
filtered_combinations = [
    (params, values)
    for params, values in param_combinations
    if values != None
        if values[0] > 5 and values[1] > -0.4
]

# 根据胜率从高到低排序
filtered_combinations.sort(key=lambda x: x[1][0], reverse=True)

# 输出最佳参数，包括交易次数
for i in range(min(100, len(filtered_combinations))):
    params, values = filtered_combinations[i]
    print(f"排名 {i + 1} 的参数组合: {[round(value, 3) for value in params.values()]}, "
          f"收益率: {values[0]}, 最大回测: {values[1]}，交易次数: {values[2]}")



[I 2025-01-17 17:01:13,958] A new study created in memory with name: no-name-9af71bff-51c1-420f-953e-02360748ab93


Optimizing:   0%|                                                           | 0/100 [00:00<?, ?it/s]

[I 2025-01-17 17:01:27,836] Trial 0 finished with values: [0.5646107123739063, -0.7273491140790652, 41.0] and parameters: {'class_weight_1': 698, 'class_weight_-1': 460, 'learning_rate': 0.00493850119366445, 'num_leaves': 238, 'max_depth': 1, 'min_child_samples': 44, 'max_bin': 255, 'subsample': 0.7570053532215002, 'lambda_l1': 3.4062681184185974, 'lambda_l2': 4.856424032164723, 'bagging_fraction': 0.703131562571165, 'feature_fraction': 0.7274364090221886, 'w1': 0.32, 'w2': 0.22999999999999998, 'w3': 0.16999999999999998, 'w4': 0.09, 'w5': 0.08, 'w6': 0.1100000000000001, 'n': 8, 'stop_rate': -0.02825, 'profit_rate': 0.00925}.
[I 2025-01-17 17:01:36,131] Trial 4 finished with values: [1.5876339739183267, -0.23927431752584616, 34.0] and parameters: {'class_weight_1': 989, 'class_weight_-1': 652, 'learning_rate': 0.019804998810055974, 'num_leaves': 248, 'max_depth': 2, 'min_child_samples': 14, 'max_bin': 464, 'subsample': 0.7515476786711806, 'lambda_l1': 2.3492206529284343, 'lambda_l2': 1.

排名 1 的参数组合: [905, 597, 0.001, 138, 4, 82, 475, 0.607, 6.09, 5.839, 0.642, 0.629, 0.57, 0.13, 0.07, 0.05, 0.05, 0.13, 9, -0.019, 0.007], 收益率: 15.469005776507286, 最大: 0.0，收益率: 49.0
排名 2 的参数组合: [530, 349, 0.053, 244, 15, 44, 350, 0.687, 7.068, 0.887, 0.898, 0.663, 0.35, 0.21, 0.08, 0.12, 0.07, 0.17, 10, -0.016, 0.006], 收益率: 11.139610168021074, 最大: 0.0，收益率: 9.0
排名 3 的参数组合: [971, 640, 0.041, 253, 0, 49, 239, 0.973, 9.061, 0.956, 0.866, 0.804, 0.48, 0.26, 0.05, 0.05, 0.05, 0.11, 10, -0.02, 0.007], 收益率: 10.82002690383089, 最大: 0.0，收益率: 11.0
排名 4 的参数组合: [435, 287, 0.004, 238, 1, 56, 255, 0.757, 0.372, 3.159, 0.703, 0.516, 0.32, 0.09, 0.07, 0.24, 0.07, 0.21, 8, -0.014, 0.009], 收益率: 10.397408598779387, 最大: 0.0，收益率: 49.0
排名 5 的参数组合: [530, 349, 0.053, 244, 15, 33, 350, 0.798, 4.812, 0.887, 0.898, 0.787, 0.64, 0.08, 0.07, 0.05, 0.05, 0.11, 10, -0.024, 0.007], 收益率: 10.122704401915586, 最大: 0.0，收益率: 8.0
排名 6 的参数组合: [823, 543, 0.049, 157, 15, 41, 371, 0.603, 7.729, 5.444, 0.914, 0.52, 0.56, 0.07, 0.09, 

In [203]:
# 过滤出收益率大于1000%且胜率大于50%的组合
filtered_combinations = [
    (params, values)
    for params, values in param_combinations
    if values != None
        if values[0] > 5 and values[1] > -0.4
]

with open(r'D:\\output1.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # 写入表头
    header = list(filtered_combinations[0][0].keys()) + ['cash', 'stop_loss','trade_cnt']
    writer.writerow(header)
    
    # 写入每一行的数据
    for combination in filtered_combinations:
        params = list(combination[0].values())
        metrics = combination[1]
        writer.writerow(params + metrics)

In [202]:
from optuna.visualization import plot_param_importances, plot_contour

# 可视化超参数重要性
fig = plot_param_importances(study)
fig.show()

# 可视化超参数之间的相互作用（选择几个重要的超参数）
fig = plot_param_importances(study, target=lambda t: t.values[0] if t.values else None)
fig.show()

# 可视化超参数之间的相互作用（选择几个重要的超参数）
fig = plot_param_importances(study, target=lambda t: t.values[1] if t.values else None)
fig.show()

# 可视化超参数之间的相互作用（选择几个重要的超参数）
fig = plot_param_importances(study, target=lambda t: t.values[2] if t.values else None)
fig.show()

In [212]:

# 假设 lgb_future2 包含你需要的特征数据

# 定义目标函数

# 假设 lgb_future2 包含你需要的特征数据
features = lgb_future2.columns.drop(['timestamp','signal','predicted_signal','predicted_signal2'], errors='ignore').tolist()
train_size = int(0.9 * len(lgb_future2))  # 70% 数据用于训练

train_data = lgb_future2[:train_size]  # 训练集
test_data = lgb_future2[train_size:]   # 测试集

X_train = train_data[features]
y_train = train_data['signal']
X_test = test_data[features]
y_test = test_data['signal']

# 定义需要调优的超参数
param = {
'objective': 'multiclass',  # 多分类
'num_class': 3,  # 三分类
'metric': 'multi_logloss',  # 多分类的对数损失，或使用 multi_error 或 accuracy
'boosting_type': 'gbdt',  # 使用 GOSS 加速树构建
'learning_rate': 0.0529776218317605,  # 0.001 到 0.1 之间
'num_leaves': 244,  # 20 到 256 之间
'max_depth': 15,  # 最大深度，-1 表示没有限制
'min_child_samples': 33,  # 每个叶子的最小样本数
'max_bin': 350,  # 特征分箱数
'subsample': 0.798388777868723,  # 每棵树训练时使用的样本比例
'lambda_l1': 4.81242655687031,  # L1 正则化系数
'lambda_l2': 0.886708183880133,  # L2 正则化系数
'bagging_fraction': 0.897880606230822,  # 每棵树的样本采样比例
'feature_fraction': 0.787452101211785,  # 每棵树使用的特征比例
'boost_from_average': False,  # 对于多分类任务，通常设置为 False
'random_state': 42,  # 随机种子
'verbosity':-1,
# 类别不平衡时的权重调整
'class_weight': {
    0: 1,
    1: 530,  # 对于类别1（如平仓）的权重
    -1: 349  # 对于类别-1（如买入）的权重
},
}
weights = [0.64,0.08,0.07,0.05,0.05,0.11,0
]
w1, w2, w3, w4, w5, w6, w7 = weights
n = 10
#stop_rate固定止损
stop_rate = -0.024
profit_rate = 0.007
# 训练 LGBM 模型
lgb_model = lgb.LGBMClassifier(**param)
lgb_model.fit(X_train, y_train)
y_pred  = lgb_model.predict(X_test)
# 将预测信号加入回测数据
lgb_future2.loc[X_test.index, 'predicted_signal'] = y_pred
lgb_future3 = lgb_future2.dropna().reset_index(drop=True)

lgb_future3 = predicted_signal_process(lgb_future3, n, profit_rate, stop_rate, w1, w2, w3, w4, w5, w6, w7)
lgb_future3, trades = vbt_process(lgb_future3)
cash, min_loss, max_get = simulate_trading_performance(trades, n)
'''
#实战数据测试
dff2 = dff.copy()
y_pred  = lgb_model.predict(dff2.iat[:,:-1])
dff2.loc[dff2.index, 'predicted_signal'] = y_pred
dff2, trade_cnts = predicted_signal_process(dff2, divide, n, open_cd, close_cd, add_cd, ir, stop_rate, profit_rate, loss_rate)
dff2, trades = vbt_process(dff2)

cash2, min_loss2, max_get2 = simulate_trading_performance(trades, n, divide, stop_rate, trade_cnts)
ic(cash, min_loss, max_get, cash2, min_loss2, max_get2)
'''
ic(cash, min_loss,len(trades))


ic| cash: 10.122704401915586
    min_loss: -0.22316525542011567
    len(trades): 8


(10.122704401915586, -0.22316525542011567, 8)

In [26]:
lgb_future3[lgb_future3['predicted_signal'] != lgb_future3['predicted_signal2']].to_csv('C:\pythonProject\lgb_future3.csv', index=False, encoding='utf-8-sig')

In [206]:
lgb_future3['predicted_signal2'].value_counts()

predicted_signal2
 0.0    17268
-1.0        9
 1.0        8
Name: count, dtype: int64

In [213]:
trades

Unnamed: 0,id,col,size,entry_idx,entry_price,entry_fees,exit_idx,exit_price,exit_fees,pnl,return,direction,status,parent_id
0,0,0,2.943427,237,0.33974,0.0,751,0.33504,0.0,0.013834,0.013834,1,1,0
1,1,0,2.942821,779,0.33981,0.0,3754,0.31642,0.0,0.068833,0.068833,1,1,1
2,2,0,3.192033,3925,0.31328,0.0,8601,0.31823,0.0,0.015801,0.015801,0,1,2
3,3,0,3.126075,9004,0.31989,0.0,9744,0.3128,0.0,0.022164,0.022164,1,1,3
4,4,0,3.101641,10425,0.32241,0.0,10791,0.31524,0.0,-0.022239,-0.022239,0,1,4
5,5,0,3.164056,11687,0.31605,0.0,12230,0.32452,0.0,0.0268,0.0268,0,1,5
6,6,0,3.081474,12232,0.32452,0.0,13263,0.34008,0.0,0.047948,0.047948,0,1,6
7,7,0,2.98552,13597,0.33495,0.0,15902,0.38763,0.0,0.157277,0.157277,0,1,7


In [214]:
lgb_future3[lgb_future3['predicted_signal'] != lgb_future3['predicted_signal2']]

Unnamed: 0,timestamp,day,hour,quarter,past_min,open,high,low,close,volume,...,return_rate,predicted_signal,predicted_signal2,trade_prices,return_rates,w_stop_rate,entries,exits,short_entries,short_exits
502,2024-12-25 04:08:00,109,4,0,8,0.32919,0.32923,0.32851,0.3287,18005191,...,0.335869,1.0,0.0,0.33974,0.335869,-0.233014,False,False,False,False
505,2024-12-25 04:11:00,109,4,0,11,0.32877,0.32887,0.32776,0.3281,14499303,...,0.35477,1.0,0.0,0.33974,0.35477,-0.235324,False,False,False,False
507,2024-12-25 04:13:00,109,4,0,13,0.32812,0.32842,0.32782,0.32794,4475082,...,0.359822,1.0,0.0,0.33974,0.359822,-0.235359,False,False,False,False
3449,2024-12-27 05:15:00,111,5,1,0,0.30936,0.30956,0.3083,0.30835,15384471,...,0.0,1.0,0.0,0.33981,1.020269,-0.216894,False,False,False,False
3754,2024-12-27 10:20:00,111,10,1,5,0.31614,0.31649,0.31608,0.31642,3773322,...,0.0,-1.0,1.0,0.33981,0.739207,-0.217064,False,False,False,True
4076,2024-12-27 15:42:00,111,15,2,12,0.30906,0.30932,0.30855,0.30904,15141139,...,0.0,1.0,0.0,0.31328,-0.135342,-0.220443,False,False,False,False
4678,2024-12-28 01:44:00,112,1,2,14,0.31508,0.31541,0.31507,0.31536,3344780,...,0.0,-1.0,0.0,0.31328,0.066394,-0.229152,False,False,False,False
4935,2024-12-28 06:01:00,112,6,0,1,0.31073,0.311,0.31061,0.31061,2633032,...,0.152925,1.0,0.0,0.31328,-0.085227,-0.204489,False,False,False,False
5250,2024-12-28 11:16:00,112,11,1,1,0.31493,0.31537,0.31491,0.31511,5005501,...,0.0,-1.0,0.0,0.31328,0.058414,-0.201666,False,False,False,False
5388,2024-12-28 13:34:00,112,13,2,4,0.31485,0.31511,0.31485,0.31511,2663225,...,0.0,-1.0,0.0,0.31328,0.058414,-0.188949,False,False,False,False


In [31]:
trades.to_csv(r'D:\output2.csv', encoding='utf-8-sig')

In [36]:
lgb_future3.iloc[:,-4:].value_counts()
dff2.iloc[:,-4:].value_counts()

entries  exits  short_entries  short_exits
False    False  False          False          9416
                               True              2
                True           False             2
         True   False          False             1
True     False  False          False             1
Name: count, dtype: int64

In [216]:
# 获取特征重要性（基于分裂次数）
importance_split = lgb_model.booster_.feature_importance(importance_type='split')

# 获取特征重要性（基于增益）
importance_gain = lgb_model.booster_.feature_importance(importance_type='gain')

# 将结果放入 DataFrame 进行展示
importance_df = pd.DataFrame({
    'Feature': features,
    'Importance (split)': importance_split,
    'Importance (gain)': importance_gain
})

# 按照增益重要性排序
importance_df = importance_df.sort_values(by='Importance (gain)', ascending=False)

importance_df

Unnamed: 0,Feature,Importance (split),Importance (gain)
47,rsi1,780,2.675604e+06
95,position,459,1.784372e+06
24,current_last_low_rate,413,1.637479e+06
96,return_rate,803,4.310951e+05
51,rsi3,163,2.405076e+05
...,...,...,...
68,atr2_4_stop_rate,89,1.991824e+03
65,atr1_4_stop_rate,81,1.568354e+03
6,low,71,1.564467e+03
71,atr3_4_stop_rate,92,1.459344e+03


In [43]:


# 保存模型
joblib.dump(lgb_model, r'C:\pythonProject\trained_lgb_model2.txt')




['C:\\pythonProject\\trained_lgb_model2.txt']

In [9]:


# 读取txt文件内容
file_path = r'C:\pythonProject\data.csv'

dff = pd.read_csv(file_path,encoding='utf-8-sig')
dff[dff['pred']!=0]

Unnamed: 0,day,hour,quarter,past_min,open,high,low,close,volume,quote_asset_volume,...,taker_sell_quote_asset_volume_diff,taker_sell_quote_asset_volume_rate,taker_buy_sell_volume_diff,taker_buy_sell_volume_rate,taker_buy_sell_quote_asset_volume_diff,taker_buy_sell_quote_asset_volume_rate,current_taker_buy_sell_volume_rate,current_buy_sell_quote_asset_volume_rate,timestamp,pred
16,0,8,1,0,0.38664,0.38946,0.38607,0.38944,44098791.0,1.712890e+07,...,2.837072e+06,0.53698,4592897.0,-1.98656,1.784863e+06,-1.98996,0.10909,0.10934,2025-01-07 08:15:00,-1
17,0,8,1,0,0.38664,0.38982,0.38607,0.38982,45952850.0,1.785134e+07,...,2.895354e+06,0.54801,6147770.0,-2.65908,2.390740e+06,-2.66546,0.18215,0.18264,2025-01-07 08:15:00,-1
18,0,8,1,0,0.38664,0.39016,0.38607,0.39002,47931165.0,1.862286e+07,...,3.048816e+06,0.57705,7339213.0,-3.17441,2.855335e+06,-3.18344,0.23435,0.23504,2025-01-07 08:15:00,-1
19,0,8,1,0,0.38664,0.39016,0.38607,0.38980,49251023.0,1.913755e+07,...,3.341470e+06,0.63245,7158095.0,-3.09608,2.784715e+06,-3.10470,0.21827,0.21888,2025-01-07 08:15:00,-1
20,0,8,1,0,0.38664,0.39016,0.38607,0.38972,49583084.0,1.926699e+07,...,3.400659e+06,0.64365,7186476.0,-3.10835,2.795775e+06,-3.11703,0.21806,0.21866,2025-01-07 08:15:00,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21362,0,17,3,0,0.34973,0.35007,0.34581,0.34609,62271656.0,2.164848e+07,...,3.049483e+06,0.25630,-21910782.0,12.06650,-7.603673e+06,11.83248,-0.55179,-0.55169,2025-01-08 17:45:00,1
21363,0,17,3,0,0.34973,0.35007,0.34581,0.34609,62973432.0,2.189136e+07,...,3.225478e+06,0.27110,-22226048.0,12.24012,-7.712783e+06,12.00227,-0.55259,-0.55248,2025-01-08 17:45:00,1
21364,0,17,3,0,0.34973,0.35007,0.34581,0.34592,63264663.0,2.199213e+07,...,3.312894e+06,0.27844,-22440053.0,12.35797,-7.786842e+06,12.11752,-0.55429,-0.55418,2025-01-08 17:45:00,1
21365,0,17,3,0,0.34973,0.35007,0.34563,0.34570,64229544.0,2.232579e+07,...,3.557229e+06,0.29898,-22888392.0,12.60488,-7.941853e+06,12.35874,-0.55556,-0.55544,2025-01-08 17:45:00,1


In [14]:
# 保存 timestamp 列
timestamp_column = dff['timestamp']

# 选择从第二列开始的部分并转换为 float
dff = dff.iloc[:, :-2].astype(float)

# 恢复 timestamp 列
dff['timestamp'] = timestamp_column

# 如果需要，可以将 timestamp 列转换为日期时间格式
dff['timestamp'] = pd.to_datetime(dff['timestamp'])
dff.info()

ValueError: time data "2025-01-08" doesn't match format "%Y-%m-%d %H:%M:%S", at position 61. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [15]:

dff

Unnamed: 0,day,hour,quarter,past_min,open,high,low,close,volume,quote_asset_volume,...,taker_buy_quote_asset_volume_rate,taker_sell_base_asset_volume_diff,taker_sell_base_asset_volume_rate,taker_sell_quote_asset_volume_diff,taker_sell_quote_asset_volume_rate,taker_buy_sell_volume_diff,taker_buy_sell_volume_rate,taker_buy_sell_quote_asset_volume_diff,taker_buy_sell_quote_asset_volume_rate,timestamp
0,0.0,8.0,1.0,0.0,0.38664,0.38936,0.38607,0.38819,38050270.0,1.477748e+07,...,0.62175,6075348.0,0.44480,2.380280e+06,0.45052,894464.0,-0.38688,3.470304e+05,-0.38691,2025-01-07 08:15:00
1,0.0,8.0,1.0,0.0,0.38664,0.38936,0.38607,0.38827,38107869.0,1.479984e+07,...,0.62681,6075771.0,0.44483,2.380444e+06,0.45055,951217.0,-0.41143,3.690633e+05,-0.41147,2025-01-07 08:15:00
2,0.0,8.0,1.0,0.0,0.38664,0.38936,0.38607,0.38834,38229164.0,1.484695e+07,...,0.63209,6137483.0,0.44935,2.404409e+06,0.45509,949088.0,-0.41051,3.682344e+05,-0.41055,2025-01-07 08:15:00
3,0.0,8.0,1.0,0.0,0.38664,0.38936,0.38607,0.38829,38294415.0,1.487228e+07,...,0.63363,6185267.0,0.45285,2.422963e+06,0.45860,918771.0,-0.39739,3.564628e+05,-0.39742,2025-01-07 08:15:00
4,0.0,8.0,1.0,0.0,0.38664,0.38936,0.38607,0.38843,38533179.0,1.496501e+07,...,0.65252,6210731.0,0.45471,2.432852e+06,0.46047,1106607.0,-0.47864,4.294111e+05,-0.47875,2025-01-07 08:15:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22344,0.0,19.0,1.0,0.0,0.34778,0.34936,0.34778,0.34849,23303460.0,8.122085e+06,...,-0.57879,-8981134.0,-0.46988,-3.134126e+06,-0.47017,-9051244.0,-0.74869,-3.173244e+06,-0.74989,2025-01-08 19:15:00
22345,0.0,19.0,1.0,0.0,0.34778,0.34936,0.34778,0.34860,23649025.0,8.242525e+06,...,-0.57150,-8863349.0,-0.46372,-3.093074e+06,-0.46401,-8941249.0,-0.73959,-3.134908e+06,-0.74083,2025-01-08 19:15:00
22346,0.0,19.0,1.0,0.0,0.34778,0.34936,0.34778,0.34860,23669793.0,8.249765e+06,...,-0.57090,-8861313.0,-0.46361,-3.092364e+06,-0.46390,-8924553.0,-0.73821,-3.129087e+06,-0.73946,2025-01-08 19:15:00
22347,0.0,19.0,1.0,0.0,0.34778,0.34936,0.34778,0.34863,23861303.0,8.316521e+06,...,-0.56841,-8747675.0,-0.45766,-3.052754e+06,-0.45796,-8960319.0,-0.74117,-3.141552e+06,-0.74240,2025-01-08 19:15:00


In [162]:
y_pred  = lgb_model.predict(dff.iloc[:,1:-1])
dff['pred'] = y_pred



In [163]:
dff['pred'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 7726 entries, 0 to 7725
Series name: pred
Non-Null Count  Dtype
--------------  -----
7726 non-null   int64
dtypes: int64(1)
memory usage: 60.5 KB
