In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
import tensorflow as tf
from arch import arch_model
import matplotlib.pyplot as plt
from tqdm import tqdm
import os

# 设置随机种子
np.random.seed(42)
tf.random.set_seed(42)

class AutoEncoder:
    def __init__(self, input_dim, encoding_dim):
        self.input_dim = input_dim
        self.encoding_dim = encoding_dim
        self.autoencoder = self._build_autoencoder()
        
    def _build_autoencoder(self):
        # 编码器
        input_layer = tf.keras.layers.Input(shape=(self.input_dim,))
        encoded = tf.keras.layers.Dense(512, activation='relu')(input_layer)
        encoded = tf.keras.layers.Dense(256, activation='relu')(encoded)
        encoded = tf.keras.layers.Dense(self.encoding_dim, activation='relu')(encoded)
        
        # 解码器
        decoded = tf.keras.layers.Dense(512, activation='relu')(encoded)
        decoded = tf.keras.layers.Dense(256, activation='relu')(decoded)
        decoded = tf.keras.layers.Dense(self.input_dim, activation='sigmoid')(decoded)
        
        autoencoder = tf.keras.Model(input_layer, decoded)
        encoder = tf.keras.Model(input_layer, encoded)
        
        autoencoder.compile(optimizer='adam', loss='mse')
        return {'autoencoder': autoencoder, 'encoder': encoder}
    
    def fit(self, X, epochs=5, batch_size=512):
        self.autoencoder['autoencoder'].fit(
            X, X,
            epochs=epochs,
            batch_size=batch_size,
            shuffle=True,
            verbose=1
        )
    
    def encode(self, X):
        return self.autoencoder['encoder'].predict(X, verbose=0)

class QuantStrategy:
    def __init__(self, data_path, save_dir):
        self.data_path = data_path
        self.save_dir = save_dir
        self.initial_capital = 10_000_000
        
        # 创建保存目录
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
            
    def load_and_process_data(self):
        print("正在加载数据...")
        self.df = pd.read_csv(self.data_path)
        
        # 转换日期格式
        self.df['TradingDate'] = pd.to_datetime(self.df['TradingDate'])
        
        # 获取特征列
        self.feature_cols = self.df.columns[4:].tolist()
        
        # 生成交互特征
        print("正在生成交互特征...")
        n_features = len(self.feature_cols)
        for i in tqdm(range(n_features)):
            for j in range(i+1, n_features):
                col_name = f"interact_{self.feature_cols[i]}_{self.feature_cols[j]}"
                self.df[col_name] = self.df[self.feature_cols[i]] * self.df[self.feature_cols[j]]
                
        # 更新特征列表
        self.feature_cols = [col for col in self.df.columns if col not in ['Symbol', 'TradingDate', 'Return', 'StockPrice']]
        
    def generate_signals(self):
        print("正在生成交易信号...")
        
        # 初始化结果存储
        self.portfolio_returns = []
        self.order_book = []
        self.positions = {}
        
        # 按月分组
        monthly_groups = self.df.groupby(pd.Grouper(key='TradingDate', freq='M'))
        
        # 用于存储上个月的持仓
        prev_month_holdings = None
        
        # 初始化GARCH模型的历史收益率
        portfolio_returns_history = []
        
        # 遍历每个月
        for month, month_data in tqdm(monthly_groups):
            if len(month_data) == 0:
                continue
                
            # 获取训练数据（使用过去12个月的数据）
            train_end = month - pd.DateOffset(days=1)
            train_start = train_end - pd.DateOffset(months=12)
            train_data = self.df[(self.df['TradingDate'] > train_start) & 
                               (self.df['TradingDate'] <= train_end)]
            
            if len(train_data) == 0:
                continue
                
            # 特征标准化
            scaler = StandardScaler()
            X_train = scaler.fit_transform(train_data[self.feature_cols])
            
            # 自编码器降维
            print(f"\n正在训练自编码器 - {month.strftime('%Y-%m')}...")
            autoencoder = AutoEncoder(input_dim=len(self.feature_cols), encoding_dim=60)
            autoencoder.fit(X_train, epochs=5, batch_size=512)
            
            # 提取特征
            encoded_features = autoencoder.encode(X_train)
            
            # 岭回归
            print("正在进行岭回归分析...")
            ridge = Ridge(alpha=1.0)
            ridge.fit(encoded_features, train_data['Return'])
            
            # 对当月数据进行预测
            X_current = scaler.transform(month_data[self.feature_cols])
            encoded_current = autoencoder.encode(X_current)
            predictions = ridge.predict(encoded_current)
            
            # 选择前10支股票
            top_10_indices = predictions.argsort()[-10:][::-1]
            selected_stocks = month_data.iloc[top_10_indices]
            
            # 计算持仓
            if prev_month_holdings is None:
                # 首次建仓
                capital_per_stock = self.initial_capital / 10
                holdings = {}
                for _, stock in selected_stocks.iterrows():
                    shares = int(capital_per_stock / stock['StockPrice'])
                    holdings[stock['Symbol']] = shares
                    
                    # 记录订单
                    self.order_book.append({
                        'Date': stock['TradingDate'],
                        'Symbol': stock['Symbol'],
                        'Action': 'BUY',
                        'Price': stock['StockPrice'],
                        'Shares': shares,
                        'Value': shares * stock['StockPrice']
                    })
            else:
                # 调仓
                # 卖出不在新组合中的股票
                for symbol in prev_month_holdings:
                    if symbol not in selected_stocks['Symbol'].values:
                        # 修改的部分：添加安全检查
                        if len(month_data[month_data['Symbol'] == symbol]) > 0:
                            stock_data = month_data[month_data['Symbol'] == symbol].iloc[0]
                            self.order_book.append({
                                'Date': stock_data['TradingDate'],
                                'Symbol': symbol,
                                'Action': 'SELL',
                                'Price': stock_data['StockPrice'],
                                'Shares': prev_month_holdings[symbol],
                                'Value': prev_month_holdings[symbol] * stock_data['StockPrice']
                            })
                
                # 计算可用资金
                available_capital = self.initial_capital
                holdings = {}
                
                # 买入新股票
                capital_per_stock = available_capital / 10
                for _, stock in selected_stocks.iterrows():
                    shares = int(capital_per_stock / stock['StockPrice'])
                    holdings[stock['Symbol']] = shares
                    
                    if stock['Symbol'] not in prev_month_holdings:
                        self.order_book.append({
                            'Date': stock['TradingDate'],
                            'Symbol': stock['Symbol'],
                            'Action': 'BUY',
                            'Price': stock['StockPrice'],
                            'Shares': shares,
                            'Value': shares * stock['StockPrice']
                        })
            
            # 更新持仓
            prev_month_holdings = holdings
            
            # 计算当月每日组合收益率
            daily_returns = []
            for date in pd.date_range(month, month + pd.DateOffset(months=1), freq='D'):
                if date in month_data['TradingDate'].values:
                    day_data = month_data[month_data['TradingDate'] == date]
                    portfolio_return = 0
                    for symbol, shares in holdings.items():
                        stock_return = day_data[day_data['Symbol'] == symbol]['Return'].values
                        if len(stock_return) > 0:
                            portfolio_return += stock_return[0] * (1/10)  # 等权重
                    daily_returns.append({'Date': date, 'Return': portfolio_return})
                    portfolio_returns_history.append(portfolio_return)
            
            # GARCH风控
            if len(portfolio_returns_history) > 30:  # 至少需要30天的数据
                returns_array = np.array(portfolio_returns_history[-30:])
                garch_model = arch_model(returns_array, vol='Garch', p=1, q=1)
                garch_result = garch_model.fit(disp='off')
                volatility = garch_result.conditional_volatility[-1]
                
                # 如果波动率超过阈值，触发清仓信号
                if volatility > 0.05:  # 5%的波动率阈值
                    for symbol, shares in holdings.items():
                        if len(month_data[month_data['Symbol'] == symbol]) > 0:  # 添加安全检查
                            stock_data = month_data[month_data['Symbol'] == symbol].iloc[-1]
                            self.order_book.append({
                                'Date': stock_data['TradingDate'],
                                'Symbol': symbol,
                                'Action': 'SELL',
                                'Price': stock_data['StockPrice'],
                                'Shares': shares,
                                'Value': shares * stock_data['StockPrice']
                            })
                    holdings = {}
            
            self.portfolio_returns.extend(daily_returns)
            
    def save_results(self):
        print("正在保存结果...")
        
        # 保存组合收益率
        returns_df = pd.DataFrame(self.portfolio_returns)
        returns_df.to_csv(f"{self.save_dir}/portfolio_returns.csv", index=False)
        
        # 保存订单薄
        orders_df = pd.DataFrame(self.order_book)
        orders_df.to_csv(f"{self.save_dir}/order_book.csv", index=False)
        
        # 可视化
        returns_df['Date'] = pd.to_datetime(returns_df['Date'])
        returns_df.set_index('Date', inplace=True)
        
        plt.figure(figsize=(12, 6))
        plt.plot(returns_df.index, (1 + returns_df['Return']).cumprod() - 1)
        plt.title('Cumulative Portfolio Returns')
        plt.xlabel('Date')
        plt.ylabel('Returns')
        plt.grid(True)
        plt.savefig(f"{self.save_dir}/cumulative_returns.png")
        plt.close()

# 主程序
if __name__ == "__main__":
    strategy = QuantStrategy(
        data_path='/Users/xiaoquanliu/Desktop/merged_factors_processed_filled1.csv',
        save_dir='/Users/xiaoquanliu/Desktop/Strategy_Results'
    )
    
    strategy.load_and_process_data()
    strategy.generate_signals()
    strategy.save_results()
    print("策略回测完成！")


2024-12-04 09:32:53.565664: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


正在加载数据...
正在生成交互特征...


100%|██████████| 37/37 [00:02<00:00, 14.74it/s]


正在生成交易信号...


  0%|          | 0/25 [00:00<?, ?it/s]


正在训练自编码器 - 2022-11...
Epoch 1/5


2024-12-04 09:33:44.625526: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
正在进行岭回归分析...


  4%|▍         | 1/25 [00:03<01:16,  3.17s/it]


正在训练自编码器 - 2022-12...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
正在进行岭回归分析...


  8%|▊         | 2/25 [00:13<02:55,  7.63s/it]


正在训练自编码器 - 2023-01...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
正在进行岭回归分析...


 12%|█▏        | 3/25 [00:30<04:22, 11.92s/it]


正在训练自编码器 - 2023-02...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
正在进行岭回归分析...


 16%|█▌        | 4/25 [00:54<05:46, 16.52s/it]


正在训练自编码器 - 2023-03...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
正在进行岭回归分析...


 20%|██        | 5/25 [01:29<07:43, 23.17s/it]


正在训练自编码器 - 2023-04...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
正在进行岭回归分析...


 24%|██▍       | 6/25 [02:15<09:45, 30.81s/it]


正在训练自编码器 - 2023-05...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
正在进行岭回归分析...


 28%|██▊       | 7/25 [03:11<11:43, 39.11s/it]


正在训练自编码器 - 2023-06...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
正在进行岭回归分析...


 32%|███▏      | 8/25 [04:13<13:08, 46.40s/it]


正在训练自编码器 - 2023-07...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 73/668 [==>...........................] - ETA: 8s - loss: 0.7977