In [1]:
import pandas as pd

# 读取合并后的数据
file_path = "/home/jesse/Projects/Self_Learning/RL_Testing/close_prices.csv"
df = pd.read_csv(file_path, parse_dates=["date"], index_col="date")

# 计算累计收益率 (涨幅) = (最终价格 / 初始价格) - 1
returns = df.iloc[-1] / df.iloc[0] - 1

# 选取涨幅最高的 30 只股票
top_30_stocks = returns.nlargest(30).index.tolist()
df_top30 = df[top_30_stocks]

# 保存选出的30只股票数据
df_top30.to_csv("/home/jesse/Projects/Self_Learning/RL_Testing/top30_stocks.csv")

# 打印前 5 行数据
df_top30.head()


Unnamed: 0_level_0,CELH,DTST,RDNT,IESC,AXON,TSLA,NVDA,SANG,AMD,HIVE,...,CWST,COKE,MELI,CAMT,GRVY,NFLX,CORT,ATLC,SAIA,INOD
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-02,0.35,0.1,1.62,5.45,15.38,10.0067,3.74109,0.19,3.95,0.15,...,5.81,67.7245,101.73,4.3564,3.84,51.831,3.37,3.59,30.52,2.6
2014-01-03,0.37,0.1,1.61,5.9,15.67,9.97067,3.69627,0.19,4.0,0.15,...,5.84,68.6975,102.23,4.1962,3.92,51.871,3.39,3.36,31.14,2.6
2014-01-06,0.37,0.1,1.6,6.03,15.64,9.8,3.74609,0.23,4.13,0.15,...,5.85,68.1331,99.82,3.9981,3.96,51.367,3.4,3.32,30.22,2.55
2014-01-07,0.42,0.1,1.61,5.62,15.91,9.95733,3.80653,0.23,4.18,0.15,...,5.77,69.9662,97.489,4.1112,3.84,48.5,3.58,3.29,30.93,2.6
2014-01-08,0.38,0.1,1.57,5.86,17.93,10.0853,3.85834,0.23,4.18,0.15,...,5.64,69.1301,97.842,4.0735,3.92,48.713,3.55,3.26,31.79,2.63


In [2]:
import numpy as np

# 计算每日收益率
daily_returns = df_top30.pct_change().dropna()

# 计算均值收益率（年化）
mean_returns = daily_returns.mean() * 252

# 计算协方差矩阵（年化）
cov_matrix = daily_returns.cov() * 252


  daily_returns = df_top30.pct_change().dropna()


In [3]:
import scipy.optimize as sco

# 设定随机投资组合
num_assets = len(top_30_stocks)
weights = np.random.random(num_assets)
weights /= np.sum(weights)

# 定义目标函数（最小化负的夏普比率）
def negative_sharpe(weights, mean_returns, cov_matrix, risk_free_rate=0.02):
    portfolio_return = np.dot(weights, mean_returns)
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_volatility
    return -sharpe_ratio  # 负夏普比率（因为要最小化）

# 约束条件：所有权重之和为1
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})

# 设定边界：每只股票的权重在 0~1 之间
bounds = tuple((0, 1) for _ in range(num_assets))

# 使用优化器求解
opt_result = sco.minimize(negative_sharpe, weights, args=(mean_returns, cov_matrix), 
                          method="SLSQP", bounds=bounds, constraints=constraints)

# 获取最佳权重
optimal_weights = opt_result.x

# 输出结果
portfolio_allocation = pd.DataFrame({
    "Stock": top_30_stocks,
    "Weight": optimal_weights
})

# 按权重排序
portfolio_allocation = portfolio_allocation.sort_values(by="Weight", ascending=False)

# 保存结果
portfolio_allocation.to_csv("/home/jesse/Projects/Self_Learning/RL_Testing/optimal_portfolio.csv", index=False)

# 显示前 10 只股票的权重
portfolio_allocation.head(10)


Unnamed: 0,Stock,Weight
21,COKE,0.137809
12,SMID,0.090187
17,PDEX,0.081572
20,CWST,0.077179
0,CELH,0.067244
11,ZYXI,0.055075
3,IESC,0.054986
6,NVDA,0.050808
24,GRVY,0.049271
15,KRMD,0.046381
