In [None]:
import os
import pandas as pd

# 设定文件夹路径和筛选条件
directory = "SP500_10_20"
min_data_length = 2500  # 数据行数超过 2500 行
top_stocks_count = 300  # 按成交量筛选前 300 只大盘股

# 准备存储有效的股票列表
valid_tickers = []

# 读取数据并筛选符合条件的股票
for ticker in os.listdir(directory):
    if ticker.endswith(".csv"):
        file_path = os.path.join(directory, ticker)
        df = pd.read_csv(file_path)

        # 确保数据量大于 2500 行
        if df.shape[0] >= min_data_length:
            # 计算成交量均值（假设 Volume 列为成交量）
            avg_volume = df['Volume'].mean()

            # 将符合条件的股票加入列表
            valid_tickers.append({
                'ticker': ticker.replace(".csv", ""),
                'avg_volume': avg_volume
            })

# 按成交量排序并选择前 300 只大盘股
valid_tickers = sorted(valid_tickers, key=lambda x: x['avg_volume'], reverse=True)
top_300_stocks = [ticker['ticker'] for ticker in valid_tickers[:top_stocks_count]]

# 输出前 300 只大盘股
print(top_300_stocks)

In [None]:
import pandas as pd
import os

# 设定文件夹路径和筛选条件
directory = "SP500_10_20"

# 读取数据并存储
stock_data = {}

for ticker in top_300_stocks:
    file_path = os.path.join(directory, f"{ticker}.csv")
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        
        # 删除前两行，重置索引
        df = df.drop([0, 1]).reset_index(drop=True)
        
        # 确保数据包含 'Date' 和 'Close' 列
        if 'Date' in df.columns and 'Close' in df.columns:
            df['Date'] = pd.to_datetime(df['Date'])
            df.set_index('Date', inplace=True)
            stock_data[ticker] = df['Close']
        else:
            print(f"缺少必要列：{ticker}")
    else:
        print(f"文件未找到：{ticker}")

# 输出加载的数据
stock_data.keys()


In [None]:
# 计算每只股票的每日收益率
returns = pd.DataFrame()

for ticker, data in stock_data.items():
    returns[ticker] = data.pct_change().dropna()

# 输出前几行收益率数据
returns.head()

In [None]:
# 计算收益率均值
mean_returns = returns.mean()

# 输出收益率均值
mean_returns

In [None]:
# 计算协方差矩阵
cov_matrix = returns.cov()

# 输出协方差矩阵
cov_matrix

In [None]:
import numpy as np
from scipy.optimize import minimize

# 定义目标函数：夏普率（最大化夏普率）
def objective(weights, mean_returns, cov_matrix, risk_free_rate=0.0):
    portfolio_return = np.sum(weights * mean_returns)
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_volatility
    return -sharpe_ratio  # 我们需要最小化负的夏普率

# 初始投资组合权重，均匀分配
num_assets = len(mean_returns)
initial_weights = np.ones(num_assets) / num_assets

# 约束：所有权重和为 1，且每个权重在 0 到 1 之间
constraints = ({
    'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1
})
bounds = tuple((0, 1) for asset in range(num_assets))

In [None]:
# 执行优化
optimized_result = minimize(objective, initial_weights, args=(mean_returns, cov_matrix),
                            method='SLSQP', bounds=bounds, constraints=constraints)

# 获取优化后的投资比例
optimal_weights = optimized_result.x

# 输出优化结果
optimal_weights

In [None]:
# 输出最佳投资组合及每只股票的投资比例
optimal_portfolio = pd.DataFrame({'Stock': mean_returns.index, 'Weight': optimal_weights})
optimal_portfolio['Weight'] = optimal_portfolio['Weight'] / optimal_portfolio['Weight'].sum()  # 确保权重和为1

# 过滤掉权重为0的股票
optimal_portfolio = optimal_portfolio[optimal_portfolio['Weight'] > 0]

# 显示投资组合
optimal_portfolio.sort_values(by='Weight', ascending=False, inplace=True)
optimal_portfolio

In [None]:
# 将最佳投资组合保存到 CSV 文件
optimal_portfolio.to_csv('optimal_portfolio.csv', index=False)

print("最佳投资组合已保存到 'optimal_portfolio.csv'")