In [2]:
import os
import pandas as pd

# 设定文件夹路径和筛选条件
directory = "SP500_10_20"
min_data_length = 2500  # 数据行数超过 2500 行
top_stocks_count = 300  # 按成交量筛选前 300 只大盘股

# 存储有效股票数据
stock_data = {}
valid_tickers = []

for ticker in os.listdir(directory):
    if ticker.endswith(".csv"):
        file_path = os.path.join(directory, ticker)
        df = pd.read_csv(file_path)
        
        # 确保数据量足够，并且包含 'Close' 和 'Volume'
        if df.shape[0] >= min_data_length and 'Close' in df.columns and 'Volume' in df.columns:
            df['Date'] = pd.to_datetime(df['Date'])
            df.set_index('Date', inplace=True)
            df.sort_index(inplace=True)  # 确保日期顺序正确
            
            avg_volume = df['Volume'].mean()
            valid_tickers.append({'ticker': ticker.replace(".csv", ""), 'avg_volume': avg_volume})
            stock_data[ticker.replace(".csv", "")] = df['Close']  # 直接存储 Close 数据

# 按成交量排序并选择前 300 只大盘股
valid_tickers = sorted(valid_tickers, key=lambda x: x['avg_volume'], reverse=True)
top_300_stocks = [ticker['ticker'] for ticker in valid_tickers[:top_stocks_count]]

# 生成收益率数据
returns = pd.DataFrame({ticker: stock_data[ticker].pct_change().dropna() for ticker in top_300_stocks})

# 计算年化收益率
mean_returns = returns.mean() * 252

# 输出前 300 只股票列表
print(top_300_stocks)


['NVDA', 'AAPL', 'BAC', 'TSLA', 'AMZN', 'GOOGL', 'GOOG', 'F', 'AMD', 'CMG', 'T', 'MSFT', 'INTC', 'CSCO', 'PFE', 'MU', 'AVGO', 'WMT', 'HPQ', 'C', 'CMCSA', 'LRCX', 'WFC', 'CSX', 'FCX', 'JPM', 'NFLX', 'ORCL', 'EBAY', 'RF', 'VZ', 'XOM', 'KO', 'MS', 'GM', 'AMAT', 'DAL', 'MRK', 'HAL', 'QCOM', 'KEY', 'GE', 'GILD', 'BSX', 'V', 'MGM', 'HBAN', 'SBUX', 'BMY', 'FTNT', 'GLW', 'KR', 'SCHW', 'DIS', 'PG', 'MO', 'EXC', 'SLB', 'MDLZ', 'AIG', 'WMB', 'ABT', 'NKE', 'JNJ', 'CCL', 'MNST', 'FITB', 'COP', 'NEE', 'USB', 'TJX', 'HST', 'UAL', 'OXY', 'MET', 'CVX', 'NEM', 'LOW', 'LUV', 'GEN', 'TSCO', 'CVS', 'BA', 'PHM', 'VLO', 'RTX', 'LVS', 'ON', 'TXN', 'CRM', 'CTRA', 'PARA', 'HD', 'WBA', 'PCG', 'BK', 'DD', 'JNPR', 'APH', 'VTRS', 'CPRT', 'AES', 'DECK', 'DVN', 'EW', 'CAT', 'TGT', 'MA', 'MDT', 'STX', 'LLY', 'BBY', 'DHI', 'PM', 'PEP', 'IPG', 'MCD', 'AXP', 'SO', 'PPL', 'BAX', 'JCI', 'CF', 'BKR', 'APA', 'MCHP', 'IBM', 'TFC', 'FAST', 'AFL', 'MOS', 'UNH', 'WY', 'CAG', 'BX', 'NI', 'MAS', 'CTSH', 'EA', 'UNP', 'CNP', 'NRG', 

In [3]:
# 计算年化协方差矩阵（使用 252 个交易日进行年化）
cov_matrix = returns.cov() * 252

# 输出协方差矩阵
print(cov_matrix)


          NVDA      AAPL       BAC      TSLA      AMZN     GOOGL      GOOG  \
NVDA  0.179438  0.056306  0.058765  0.076654  0.055745  0.052066  0.052011   
AAPL  0.056306  0.081002  0.040689  0.050112  0.039745  0.038218  0.038370   
BAC   0.058765  0.040689  0.126671  0.050855  0.035253  0.040070  0.040307   
TSLA  0.076654  0.050112  0.050855  0.306508  0.056784  0.046166  0.046092   
AMZN  0.055745  0.039745  0.035253  0.056784  0.100178  0.046672  0.046858   
...        ...       ...       ...       ...       ...       ...       ...   
LMT   0.030686  0.022579  0.032933  0.020286  0.019298  0.021819  0.021702   
CHD   0.022684  0.016602  0.016397  0.016541  0.015773  0.015127  0.014955   
ACGL  0.032271  0.023893  0.048104  0.025380  0.018641  0.024105  0.023840   
INCY  0.049592  0.033992  0.046742  0.055121  0.043944  0.036400  0.036430   
DVA   0.033737  0.023638  0.037113  0.032389  0.022188  0.024867  0.024887   

             F       AMD       CMG  ...      BIIB       RSG    

In [4]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# 定义目标函数：最大化夏普率
def objective(weights, mean_returns, cov_matrix, risk_free_rate=0.0):
    portfolio_return = np.sum(weights * mean_returns)
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_volatility
    return -sharpe_ratio  # 由于 scipy 的 minimize 只能最小化，所以取负值

# 计算资产数量
num_assets = len(mean_returns)

# 初始权重（均匀分配）
initial_weights = np.ones(num_assets) / num_assets

# 约束条件：权重总和为 1
constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})

# 权重边界（0 ~ 1）
bounds = tuple((0, 1) for asset in range(num_assets))

# 执行优化
optimized_result = minimize(objective, initial_weights, args=(mean_returns, cov_matrix),
                            method='SLSQP', bounds=bounds, constraints=constraints)

# 获取最优权重
optimal_weights = optimized_result.x

# 转换为 DataFrame
optimal_portfolio = pd.DataFrame({'Stock': mean_returns.index, 'Weight': optimal_weights})

# **筛选权重最高的 60 只股票**
top_60 = optimal_portfolio.nlargest(60, 'Weight')

# **重新归一化权重**
top_60['Weight'] = top_60['Weight'] / top_60['Weight'].sum()

# **按权重降序排列**
top_60.sort_values(by='Weight', ascending=False, inplace=True)

# 显示最终持仓
print(top_60)

# **保存为 CSV**
top_60.to_csv("optimal_portfolio_top60.csv", index=False)
print("最佳投资组合(60 只股票)已保存到 'optimal_portfolio_top60.csv'")


    Stock        Weight
68    NEE  1.664504e-01
248   KDP  1.083392e-01
3    TSLA  8.918853e-02
190    DG  8.737923e-02
100  CPRT  6.450870e-02
296   CHD  5.788344e-02
199  MTCH  5.598488e-02
110   LLY  4.889064e-02
26   NFLX  4.098841e-02
165  DXCM  4.026625e-02
231  COST  3.836573e-02
212  CTAS  3.377759e-02
262  TTWO  3.303761e-02
4    AMZN  3.165105e-02
281   STZ  2.311203e-02
278  ODFL  1.918895e-02
65   MNST  1.761127e-02
295   LMT  1.267742e-02
0    NVDA  1.156451e-02
1    AAPL  1.141593e-02
147   DHR  7.718220e-03
131   UNH  3.373127e-14
124   APA  3.073170e-14
130   MOS  2.870167e-14
103   DVN  2.753251e-14
24    FCX  2.261670e-14
38    HAL  2.082479e-14
148   IVZ  2.024051e-14
73    OXY  2.001389e-14
145   TPR  1.839277e-14
97   JNPR  1.775772e-14
57    SLB  1.689471e-14
7       F  1.646595e-14
123   BKR  1.599153e-14
195   BEN  1.571421e-14
59    AIG  1.559844e-14
64    CCL  1.502438e-14
219   BWA  1.468597e-14
167   HES  1.374864e-14
208   PRU  1.206633e-14
74    MET  1.205