In [None]:
import os
import pandas as pd

# 设定文件夹路径和筛选条件
directory = "SP500_10_20"
min_data_length = 2500  # 数据行数超过 2500 行
top_stocks_count = 300  # 按成交量筛选前 300 只大盘股

# 存储有效股票数据
stock_data = {}
valid_tickers = []

for ticker in os.listdir(directory):
    if ticker.endswith(".csv"):
        file_path = os.path.join(directory, ticker)
        df = pd.read_csv(file_path)
        
        # 确保数据量足够，并且包含 'Close' 和 'Volume'
        if df.shape[0] >= min_data_length and 'Close' in df.columns and 'Volume' in df.columns:
            df['Date'] = pd.to_datetime(df['Date'])
            df.set_index('Date', inplace=True)
            df.sort_index(inplace=True)  # 确保日期顺序正确
            
            avg_volume = df['Volume'].mean()
            valid_tickers.append({'ticker': ticker.replace(".csv", ""), 'avg_volume': avg_volume})
            stock_data[ticker.replace(".csv", "")] = df['Close']  # 直接存储 Close 数据

# 按成交量排序并选择前 300 只大盘股
valid_tickers = sorted(valid_tickers, key=lambda x: x['avg_volume'], reverse=True)
top_300_stocks = [ticker['ticker'] for ticker in valid_tickers[:top_stocks_count]]

# 生成收益率数据
returns = pd.DataFrame({ticker: stock_data[ticker].pct_change().dropna() for ticker in top_300_stocks})

# 计算年化收益率
mean_returns = returns.mean() * 252

# 输出前 300 只股票列表
print(top_300_stocks)


In [None]:
# 计算年化协方差矩阵（使用 252 个交易日进行年化）
cov_matrix = returns.cov() * 252

# 输出协方差矩阵
print(cov_matrix)


In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# 定义目标函数：最大化夏普率
def objective(weights, mean_returns, cov_matrix, risk_free_rate=0.0):
    portfolio_return = np.sum(weights * mean_returns)
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_volatility
    return -sharpe_ratio  # 由于 scipy 的 minimize 只能最小化，所以取负值

# 计算资产数量
num_assets = len(mean_returns)

# 初始权重（均匀分配）
initial_weights = np.ones(num_assets) / num_assets

# 约束条件：权重总和为 1
constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})

# 权重边界（0 ~ 1）
bounds = tuple((0, 1) for asset in range(num_assets))

# 执行优化
optimized_result = minimize(objective, initial_weights, args=(mean_returns, cov_matrix),
                            method='SLSQP', bounds=bounds, constraints=constraints)

# 获取最优权重
optimal_weights = optimized_result.x

# 转换为 DataFrame
optimal_portfolio = pd.DataFrame({'Stock': mean_returns.index, 'Weight': optimal_weights})

# **筛选权重最高的 60 只股票**
top_60 = optimal_portfolio.nlargest(60, 'Weight')

# **重新归一化权重**
top_60['Weight'] = top_60['Weight'] / top_60['Weight'].sum()

# **按权重降序排列**
top_60.sort_values(by='Weight', ascending=False, inplace=True)

# 显示最终持仓
print(top_60)

# **保存为 CSV**
top_60.to_csv("optimal_portfolio_top60.csv", index=False)
print("最佳投资组合(60 只股票)已保存到 'optimal_portfolio_top60.csv'")
