 # 测试LGBM模型加载和预测
这个notebook用于测试模型加载和预测过程，确保模型能正常工作。

In [3]:
import pickle
import numpy as np
import pandas as pd
import lightgbm as lgb
import os

# 显示所有列
pd.set_option('display.max_columns', None)

In [4]:
# 1. 加载模型
print("当前工作目录:", os.getcwd())
print("\n尝试加载模型...")
try:
    with open('lgbm.pkl', 'rb') as f:
        model = pickle.load(f)
    print("模型加载成功")
    print("模型类型:", type(model))
    print("模型属性:", dir(model))
except Exception as e:
    print("模型加载失败:", str(e))

当前工作目录: e:\香港中文大学深圳CUHKSZ\MFE 5210 Algorithm Trading\Project
\n尝试加载模型...
模型加载成功
模型类型: <class 'lightgbm.sklearn.LGBMRegressor'>
模型属性: ['_Booster', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__sklearn_is_fitted__', '__sklearn_tags__', '__str__', '__subclasshook__', '__weakref__', '_base_doc', '_best_iteration', '_best_score', '_class_map', '_class_weight', '_classes', '_evals_result', '_more_tags', '_n_classes', '_n_features', '_n_features_in', '_objective', '_other_params', '_process_n_jobs', '_process_params', '_update_sklearn_tags_from_dict', 'best_iteration_', 'best_score_', 'booster_', 'boosting_type', 'class_weight', 'colsample_bytree', 'evals_result_', 'feature_importances_', 'feature_name_', 'feature_names_in_', 'fit', 'fitted_'

In [5]:
# 2. 加载特征数据
try:
    features_path = os.path.join('all_test_features', 'all_test_features.csv')
    features_df = pd.read_csv(features_path)
    print("特征数据加载成功")
    print("\n数据形状:", features_df.shape)
    print("\n列名:", features_df.columns.tolist())
    print("\n前5行数据:")
    display(features_df.head())
except Exception as e:
    print("特征数据加载失败:", str(e))

特征数据加载成功

数据形状: (637966, 37)

列名: ['datetime', 'symbol', 'exchange', 'feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8', 'feature9', 'feature10', 'feature11', 'feature12', 'feature13', 'feature14', 'feature15', 'feature16', 'feature17', 'feature18', 'feature19', 'feature20', 'feature21', 'feature22', 'feature23', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close_time', 'Quote_asset_volume', 'Number_of_trades', 'Taker_buy_base_asset_volume', 'Taker_buy_quote_asset_volume', 'Ignore']

前5行数据:


Unnamed: 0,datetime,symbol,exchange,feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10,feature11,feature12,feature13,feature14,feature15,feature16,feature17,feature18,feature19,feature20,feature21,feature22,feature23,Open,High,Low,Close,Volume,Close_time,Quote_asset_volume,Number_of_trades,Taker_buy_base_asset_volume,Taker_buy_quote_asset_volume,Ignore
0,2024-01-13 23:14:00,BTCUSDT,BINANCE,-0.000178,-0.000258,0.000106,0.022433,0.152649,45.416903,0.00071,7.616324,22.156979,33.962974,18.797399,-0.001094,0.000777,0.001148,100.014166,100.117851,0.527998,0.595071,0.37858,0.281101,0.498497,0.498483,1.174911,42942.13,42946.49,42942.13,42943.63,3.59072,1705188000000.0,154201.522236,288.0,1.69421,72756.368588,0.0
1,2024-01-13 23:15:00,BTCUSDT,BINANCE,-0.000213,-0.000275,9.3e-05,0.022447,0.152552,45.443784,0.00071,7.285839,22.174242,34.222891,18.828933,-4.9e-05,-0.000415,0.000979,100.012631,100.086331,0.682366,0.704777,0.458106,0.334983,0.497234,0.497229,1.134572,42943.63,42951.1,42928.0,42930.11,19.85662,1705188000000.0,852650.039125,815.0,8.07163,346583.378214,0.0
2,2024-01-13 23:16:00,BTCUSDT,BINANCE,-0.000177,-0.000294,8.3e-05,0.022462,0.17665,45.691119,0.00071,6.691216,22.172577,33.78826,18.827519,-0.000176,-0.000624,0.001442,100.148793,100.052386,0.830136,0.81874,0.544111,0.392532,0.510795,0.510782,1.303845,42930.12,42939.98,42908.29,42915.55,11.21643,1705188000000.0,481500.681188,695.0,4.93905,212024.174964,0.0
3,2024-01-13 23:17:00,BTCUSDT,BINANCE,-0.000146,-0.000313,7.3e-05,0.022476,0.175953,47.90757,0.000709,7.874134,22.203675,34.604028,18.714764,-2.5e-05,-0.000696,0.001324,100.170025,100.060313,0.778614,0.783243,0.526356,0.379581,0.516372,0.516356,1.554809,42915.56,42921.1,42911.1,42918.95,8.03701,1705188000000.0,344908.783538,496.0,3.76086,161393.564,0.0
4,2024-01-13 23:18:00,BTCUSDT,BINANCE,-8.2e-05,-0.00033,6.3e-05,0.02249,0.174337,49.990314,0.000709,8.156227,22.072533,36.532425,18.771384,0.000336,-0.00142,0.001273,100.144337,100.027314,0.610014,0.65346,0.440473,0.320973,0.50926,0.509245,1.263835,42918.94,42939.74,42918.94,42934.11,8.38799,1705188000000.0,360152.231428,491.0,2.28773,98215.646384,0.0


In [6]:
# 3. 准备测试数据
feature_columns = [f'feature{i}' for i in range(1, 24)]

# 验证特征列是否存在
missing_features = [col for col in feature_columns if col not in features_df.columns]
if missing_features:
    print("缺少特征列:", missing_features)
else:
    print("所有特征列都存在")
    
# 获取第一行特征数据进行测试
test_features = features_df[feature_columns].iloc[0:1].values.astype(np.float32)
print("\n测试数据形状:", test_features.shape)
print("测试数据类型:", test_features.dtype)

所有特征列都存在

测试数据形状: (1, 23)
测试数据类型: float32


In [7]:
# 4. 测试预测
try:
    prediction = model.predict(test_features)
    print("预测成功!")
    print("预测结果:", prediction)
    print("预测结果形状:", prediction.shape)
    
    # 测试多行预测
    test_features_multi = features_df[feature_columns].iloc[0:5].values.astype(np.float32)
    predictions_multi = model.predict(test_features_multi)
    print("\n多行预测结果:")
    for i, pred in enumerate(predictions_multi):
        print(f"行 {i}: {pred}")
except Exception as e:
    print("预测失败:", str(e))

预测失败: 'NoneType' object is not callable


In [None]:
# 5. 测试完整预测流程
def test_prediction(row):
    try:
        # 准备特征
        features = row[feature_columns].values.reshape(1, -1).astype(np.float32)
        
        # 进行预测
        prediction = model.predict(features)[0]
        
        # 判断信号
        threshold = 0.0005
        signal = "看多" if prediction > threshold else "看空" if prediction < -threshold else "震荡"
        
        return pd.Series({
            'prediction': prediction,
            'signal': signal
        })
    except Exception as e:
        print(f"预测出错: {str(e)}")
        return pd.Series({'prediction': None, 'signal': None})

# 测试前5行数据
test_results = features_df.head().apply(test_prediction, axis=1)
print("测试结果:")
display(test_results)