In [1]:
import os
import pandas as pd

# 设定文件夹路径和筛选条件
directory = "SP500_10_20"
min_data_length = 2500  # 数据行数超过 2500 行
top_stocks_count = 300  # 按成交量筛选前 300 只大盘股

# 准备存储有效的股票列表
valid_tickers = []

# 读取数据并筛选符合条件的股票
for ticker in os.listdir(directory):
    if ticker.endswith(".csv"):
        file_path = os.path.join(directory, ticker)
        df = pd.read_csv(file_path)

        # 确保数据量大于 2500 行
        if df.shape[0] >= min_data_length:
            # 计算成交量均值（假设 Volume 列为成交量）
            avg_volume = df['Volume'].mean()

            # 将符合条件的股票加入列表
            valid_tickers.append({
                'ticker': ticker.replace(".csv", ""),
                'avg_volume': avg_volume
            })

# 按成交量排序并选择前 300 只大盘股
valid_tickers = sorted(valid_tickers, key=lambda x: x['avg_volume'], reverse=True)
top_300_stocks = [ticker['ticker'] for ticker in valid_tickers[:top_stocks_count]]

# 输出前 300 只大盘股
print(top_300_stocks)

['NVDA', 'AAPL', 'BAC', 'TSLA', 'AMZN', 'GOOGL', 'GOOG', 'F', 'AMD', 'CMG', 'T', 'MSFT', 'INTC', 'CSCO', 'PFE', 'MU', 'AVGO', 'WMT', 'HPQ', 'C', 'CMCSA', 'LRCX', 'WFC', 'CSX', 'FCX', 'JPM', 'NFLX', 'ORCL', 'EBAY', 'RF', 'VZ', 'XOM', 'KO', 'MS', 'GM', 'AMAT', 'DAL', 'MRK', 'HAL', 'QCOM', 'KEY', 'GE', 'GILD', 'BSX', 'V', 'MGM', 'HBAN', 'SBUX', 'BMY', 'FTNT', 'GLW', 'KR', 'SCHW', 'DIS', 'PG', 'MO', 'EXC', 'SLB', 'MDLZ', 'AIG', 'WMB', 'ABT', 'NKE', 'JNJ', 'CCL', 'MNST', 'FITB', 'COP', 'NEE', 'USB', 'TJX', 'HST', 'UAL', 'OXY', 'MET', 'CVX', 'NEM', 'LOW', 'LUV', 'GEN', 'TSCO', 'CVS', 'BA', 'PHM', 'VLO', 'RTX', 'LVS', 'ON', 'TXN', 'CRM', 'CTRA', 'PARA', 'HD', 'WBA', 'PCG', 'BK', 'DD', 'JNPR', 'APH', 'VTRS', 'CPRT', 'AES', 'DECK', 'DVN', 'EW', 'CAT', 'TGT', 'MA', 'MDT', 'STX', 'LLY', 'BBY', 'DHI', 'PM', 'PEP', 'IPG', 'MCD', 'AXP', 'SO', 'PPL', 'BAX', 'JCI', 'CF', 'BKR', 'APA', 'MCHP', 'IBM', 'TFC', 'FAST', 'AFL', 'MOS', 'UNH', 'WY', 'CAG', 'BX', 'NI', 'MAS', 'CTSH', 'EA', 'UNP', 'CNP', 'NRG', 

In [2]:
import pandas as pd
import os

# 设定文件夹路径和筛选条件
directory = "SP500_10_20"

# 读取数据并存储
stock_data = {}

for ticker in top_300_stocks:
    file_path = os.path.join(directory, f"{ticker}.csv")
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        
        # 删除前两行，重置索引
        # df = df.drop([0, 1]).reset_index(drop=True)
        
        # 确保数据包含 'Date' 和 'Close' 列
        if 'Date' in df.columns and 'Close' in df.columns:
            df['Date'] = pd.to_datetime(df['Date'])
            df.set_index('Date', inplace=True)
            stock_data[ticker] = df['Close']
        else:
            print(f"缺少必要列：{ticker}")
    else:
        print(f"文件未找到：{ticker}")

# 输出加载的数据
stock_data.keys()


dict_keys(['NVDA', 'AAPL', 'BAC', 'TSLA', 'AMZN', 'GOOGL', 'GOOG', 'F', 'AMD', 'CMG', 'T', 'MSFT', 'INTC', 'CSCO', 'PFE', 'MU', 'AVGO', 'WMT', 'HPQ', 'C', 'CMCSA', 'LRCX', 'WFC', 'CSX', 'FCX', 'JPM', 'NFLX', 'ORCL', 'EBAY', 'RF', 'VZ', 'XOM', 'KO', 'MS', 'GM', 'AMAT', 'DAL', 'MRK', 'HAL', 'QCOM', 'KEY', 'GE', 'GILD', 'BSX', 'V', 'MGM', 'HBAN', 'SBUX', 'BMY', 'FTNT', 'GLW', 'KR', 'SCHW', 'DIS', 'PG', 'MO', 'EXC', 'SLB', 'MDLZ', 'AIG', 'WMB', 'ABT', 'NKE', 'JNJ', 'CCL', 'MNST', 'FITB', 'COP', 'NEE', 'USB', 'TJX', 'HST', 'UAL', 'OXY', 'MET', 'CVX', 'NEM', 'LOW', 'LUV', 'GEN', 'TSCO', 'CVS', 'BA', 'PHM', 'VLO', 'RTX', 'LVS', 'ON', 'TXN', 'CRM', 'CTRA', 'PARA', 'HD', 'WBA', 'PCG', 'BK', 'DD', 'JNPR', 'APH', 'VTRS', 'CPRT', 'AES', 'DECK', 'DVN', 'EW', 'CAT', 'TGT', 'MA', 'MDT', 'STX', 'LLY', 'BBY', 'DHI', 'PM', 'PEP', 'IPG', 'MCD', 'AXP', 'SO', 'PPL', 'BAX', 'JCI', 'CF', 'BKR', 'APA', 'MCHP', 'IBM', 'TFC', 'FAST', 'AFL', 'MOS', 'UNH', 'WY', 'CAG', 'BX', 'NI', 'MAS', 'CTSH', 'EA', 'UNP', 'CNP

In [3]:
import pandas as pd

# 创建一个字典，存储每只股票的每日收益率
returns_dict = {ticker: data.pct_change().dropna() for ticker, data in stock_data.items()}

# 一次性用 pd.concat() 拼接数据
returns = pd.concat(returns_dict, axis=1)

# 输出前几行收益率数据
print(returns.head())


                NVDA      AAPL       BAC      TSLA      AMZN     GOOGL  \
Date                                                                     
2011-01-03  0.027273  0.021732  0.063718 -0.000375  0.023444  0.017476   
2011-01-04 -0.003160  0.005219  0.003524  0.001878  0.004288 -0.003690   
2011-01-05  0.076728  0.008180  0.018258  0.005999  0.013026  0.011543   
2011-01-06  0.138398 -0.000809 -0.004138  0.039135 -0.008324  0.007273   
2011-01-07  0.027936  0.007162 -0.013158  0.012912 -0.001991  0.004792   

                GOOG         F       AMD       CMG  ...      BIIB       RSG  \
Date                                                ...                       
2011-01-03  0.017476  0.027398  0.035452  0.051585  ...  0.002237  0.006363   
2011-01-04 -0.003690  0.007536  0.035419 -0.005634  ... -0.011905 -0.004659   
2011-01-05  0.011543  0.029344  0.015963 -0.006161  ...  0.004217 -0.010364   
2011-01-06  0.007273  0.018446 -0.024691  0.024887  ...  0.008698  0.006419   
2011-01

In [4]:
# 计算收益率均值
mean_returns = returns.mean()

# 输出收益率均值
mean_returns

NVDA    0.001790
AAPL    0.001195
BAC     0.000625
TSLA    0.002541
AMZN    0.001353
          ...   
LMT     0.000863
CHD     0.000785
ACGL    0.000628
INCY    0.001063
DVA     0.000621
Length: 300, dtype: float64

In [5]:
# 计算协方差矩阵
cov_matrix = returns.cov()

# 输出协方差矩阵
cov_matrix

Unnamed: 0,NVDA,AAPL,BAC,TSLA,AMZN,GOOGL,GOOG,F,AMD,CMG,...,BIIB,RSG,PFG,AEE,GD,LMT,CHD,ACGL,INCY,DVA
NVDA,0.000712,0.000223,0.000233,0.000304,0.000221,0.000207,0.000206,0.000188,0.000480,0.000166,...,0.000177,0.000115,0.000227,0.000095,0.000148,0.000122,0.000090,0.000128,0.000197,0.000134
AAPL,0.000223,0.000321,0.000161,0.000199,0.000158,0.000152,0.000152,0.000125,0.000225,0.000109,...,0.000116,0.000082,0.000156,0.000067,0.000098,0.000090,0.000066,0.000095,0.000135,0.000094
BAC,0.000233,0.000161,0.000503,0.000202,0.000140,0.000159,0.000160,0.000255,0.000264,0.000137,...,0.000141,0.000136,0.000353,0.000095,0.000184,0.000131,0.000065,0.000191,0.000185,0.000147
TSLA,0.000304,0.000199,0.000202,0.001216,0.000225,0.000183,0.000183,0.000198,0.000341,0.000194,...,0.000149,0.000090,0.000199,0.000066,0.000114,0.000081,0.000066,0.000101,0.000219,0.000129
AMZN,0.000221,0.000158,0.000140,0.000225,0.000398,0.000185,0.000186,0.000111,0.000232,0.000131,...,0.000132,0.000062,0.000128,0.000057,0.000089,0.000077,0.000063,0.000074,0.000174,0.000088
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LMT,0.000122,0.000090,0.000131,0.000081,0.000077,0.000087,0.000086,0.000103,0.000121,0.000059,...,0.000097,0.000085,0.000144,0.000084,0.000135,0.000178,0.000064,0.000100,0.000101,0.000083
CHD,0.000090,0.000066,0.000065,0.000066,0.000063,0.000060,0.000059,0.000052,0.000089,0.000047,...,0.000072,0.000064,0.000073,0.000076,0.000057,0.000064,0.000159,0.000062,0.000075,0.000050
ACGL,0.000128,0.000095,0.000191,0.000101,0.000074,0.000096,0.000095,0.000144,0.000133,0.000083,...,0.000085,0.000107,0.000199,0.000100,0.000128,0.000100,0.000062,0.000228,0.000087,0.000098
INCY,0.000197,0.000135,0.000185,0.000219,0.000174,0.000144,0.000145,0.000147,0.000244,0.000117,...,0.000271,0.000084,0.000177,0.000058,0.000120,0.000101,0.000075,0.000087,0.000813,0.000120


In [6]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# 定义目标函数：最大化夏普率
def objective(weights, mean_returns, cov_matrix, risk_free_rate=0.0):
    portfolio_return = np.sum(weights * mean_returns)
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_volatility
    return -sharpe_ratio  # 由于 scipy 的 minimize 只能最小化，所以取负值

# 计算资产数量
num_assets = len(mean_returns)

# 初始权重（均匀分配）
initial_weights = np.ones(num_assets) / num_assets

# 约束条件：权重总和为 1
constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})

# 权重边界（0 ~ 1）
bounds = tuple((0, 1) for asset in range(num_assets))

# 执行优化
optimized_result = minimize(objective, initial_weights, args=(mean_returns, cov_matrix),
                            method='SLSQP', bounds=bounds, constraints=constraints)

# 获取最优权重
optimal_weights = optimized_result.x

# 转换为 DataFrame
optimal_portfolio = pd.DataFrame({'Stock': mean_returns.index, 'Weight': optimal_weights})

# **筛选权重最高的 60 只股票**
top_60 = optimal_portfolio.nlargest(60, 'Weight')

# **重新归一化权重**
top_60['Weight'] = top_60['Weight'] / top_60['Weight'].sum()

# **按权重降序排列**
top_60.sort_values(by='Weight', ascending=False, inplace=True)

# 显示最终持仓
print(top_60)

# **保存为 CSV**
top_60.to_csv("optimal_portfolio_top60.csv", index=False)
print("最佳投资组合(60 只股票)已保存到 'optimal_portfolio_top60.csv'")


     Stock        Weight
68     NEE  1.657148e-01
248    KDP  1.081388e-01
3     TSLA  8.918700e-02
190     DG  8.576817e-02
100   CPRT  6.491071e-02
296    CHD  5.982488e-02
199   MTCH  5.605196e-02
110    LLY  4.867522e-02
26    NFLX  4.104926e-02
165   DXCM  4.017787e-02
231   COST  3.833304e-02
212   CTAS  3.428026e-02
262   TTWO  3.288619e-02
4     AMZN  3.194626e-02
281    STZ  2.350488e-02
278   ODFL  1.888315e-02
65    MNST  1.816488e-02
295    LMT  1.245290e-02
0     NVDA  1.126302e-02
1     AAPL  1.125345e-02
147    DHR  7.533334e-03
131    UNH  2.975186e-16
289    WEC  2.784502e-16
12    INTC  2.449687e-16
73     OXY  2.308673e-16
228    CMS  2.259206e-16
43     BSX  2.127645e-16
69     USB  2.116329e-16
44       V  2.072690e-16
242    HRL  1.894948e-16
30      VZ  1.748977e-16
45     MGM  1.691186e-16
42    GILD  1.609092e-16
64     CCL  1.584460e-16
62     NKE  1.577379e-16
166    PGR  1.566232e-16
14     PFE  1.548783e-16
197    AEP  1.515653e-16
118     SO  1.483494e-16
