In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [41]:
data_folder = "./沪深300成分股的数据/"

In [10]:
def winsorize(series, lower_percentile=5, upper_percentile=95):
    lower_limit = np.percentile(series, lower_percentile)
    upper_limit = np.percentile(series, upper_percentile)
    return np.clip(series, lower_limit, upper_limit)

In [34]:
# 加载数据的函数，包括成分股信息和行情数据的合并
def load_data(year):
    stock_info_path = os.path.join(data_folder, f'hs300stocks_{year}_utf-8.csv')
    kdata_path = os.path.join(data_folder, f'hs300stocks_kdata_{year}_utf-8.csv')
    
    # 读取CSV文件
    stock_info = pd.read_csv(stock_info_path)
    kdata = pd.read_csv(kdata_path)
    
    # 合并成分股信息和每日行情数据
    data = pd.merge(kdata, stock_info[['code', 'weight']], on='code')
    return data

In [19]:
def calculate_daily_returns(data):
    # 计算前一日收盘价
    data['prev_close'] = data.groupby('code')['close'].shift(1)
    # 计算每日收益率
    data['daily_return'] = (data['close'] - data['prev_close']) / data['prev_close']
    # 去掉因移动计算而产生的NaN值
    data = data.dropna(subset=['daily_return'])
    return data

In [20]:
# 计算加权平均收益率的函数
def calculate_weighted_returns(data):
    # 计算每日的加权收益率
    data['weighted_return'] = data['daily_return'] * data['weight']
    # 根据日期汇总，计算指数每日加权收益率
    daily_index_returns = data.groupby('time')['weighted_return'].sum()
    return daily_index_returns

In [21]:
# 计算年度平均收益率的函数
def calculate_annual_average_return(daily_index_returns):
    # Winsor化处理，消除极端值的影响
    winsorized_daily_index_returns = winsorize(daily_index_returns)
    # 计算年度平均收益率
    annual_average_return = np.mean(winsorized_daily_index_returns)
    return annual_average_return

In [2]:
# 主函数，遍历所有年份并调用前面定义的函数
def main():
    annual_returns = {}  # 存储每年的年度加权平均收益率
    
    # 遍历2014至2024年
    for year in range(2014, 2025):
        print(f"正在处理{year}年的数据...")
        # 加载数据
        data = load_data(year)
        # 计算每日收益率
        data = calculate_daily_returns(data)
        # 计算每日加权收益率
        daily_index_returns = calculate_weighted_returns(data)
        # 计算年度平均收益率
        annual_average_return = calculate_annual_average_return(daily_index_returns)
        # 存储年度平均收益率
        annual_returns[year] = annual_average_return

    # 计算总体平均收益率
    overall_average_return = np.mean(list(annual_returns.values()))
    
    # 输出每年及总体平均收益率
    print("\n各年度加权平均收益率：")
    for year, return_value in annual_returns.items():
        print(f"{year}年: {return_value:.4f}")
    print(f"\n2014-2024年总体平均收益率：{overall_average_return:.4f}")

In [47]:
data = load_data(2024)
data = data.drop("open_interest", axis=1)
data.head()

In [17]:
import pandas as pd

# Load the data files to explore the structure
file_path_stocks_info = '/mnt/data/hs300stocks_2024.csv'
file_path_kdata = '/mnt/data/hs300stocks_kdata_2024.csv'

# Load the CSV files
stocks_info = pd.read_csv(file_path_stocks_info)
kdata = pd.read_csv(file_path_kdata)

# Display the first few rows of each file to understand their structure
stocks_info.head(), kdata.head()


In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import os

data_folder = './沪深300成分股的数据/'

def load_data(year):
    stock_info_path = os.path.join(data_folder, f'hs300stocks_{year}.csv')
    kdata_path = os.path.join(data_folder, f'hs300stocks_kdata_{year}.csv')

    # 读取CSV文件
    stock_info = pd.read_csv(stock_info_path)
    kdata = pd.read_csv(kdata_path)

    # 合并成分股信息和每日行情数据
    data = pd.merge(kdata, stock_info[['code', 'weight']], on='code', how='left')
    return data

def min_max_scaling(series):
    if series.max() == series.min():
        return pd.Series([0] * len(series))  # 如果所有值相同，返回全0的系列
    return (series - series.min()) / (series.max() - series.min())

def calculate_liquidity_index(stocks_data):
    # 确保时间列存在并转换为日期格式
    if 'time' not in stocks_data.columns:
        raise KeyError("数据中缺少 'time' 列，请检查数据格式。")

    stocks_data['time'] = pd.to_datetime(stocks_data['time'], errors='coerce')

    # 检查日期转换是否成功
    if stocks_data['time'].isnull().any():
        raise ValueError("日期格式不正确，无法转换为日期。请检查数据中的 'time' 列。")

    # 计算流动性指标
    grouped_data = stocks_data.groupby('time').agg({
        'volume': 'sum',
        'amount': 'sum'
    }).reset_index()

    # 换手率的近似计算
    total_volume = grouped_data['volume'].sum()
    grouped_data['TurnoverRate'] = grouped_data['volume'] / total_volume if total_volume > 0 else 0

    # 标准化数据（Min-Max）
    grouped_data[['volume', 'amount', 'TurnoverRate']] = grouped_data[['volume', 'amount', 'TurnoverRate']].apply(min_max_scaling)

    # 计算每年的流动性指标
    yearly_liquidity = grouped_data.groupby(grouped_data['time'].dt.year).agg({
        'volume': 'mean',
        'amount': 'mean',
        'TurnoverRate': 'mean'
    }).reset_index()

    # 计算市场流动性（三个指标的平均值）
    yearly_liquidity['MarketLiquidity'] = yearly_liquidity[['volume', 'amount', 'TurnoverRate']].mean(axis=1)

    # 返回流动性指标
    liquidity_df = pd.DataFrame({
        'Year': pd.to_datetime(yearly_liquidity['time']).dt.year,
        'AverageVolume': yearly_liquidity['volume'],
        'AverageAmount': yearly_liquidity['amount'],
        'AverageTurnoverRate': yearly_liquidity['TurnoverRate'],
        'MarketLiquidity': yearly_liquidity['MarketLiquidity']
    })

    return liquidity_df

def visualize_liquidity_index(all_liquidity_df):
    # 绘图
    plt.figure(figsize=(12, 8))
    plt.plot(all_liquidity_df['Year'], all_liquidity_df['AverageVolume'], marker='o', label='平均成交量')
    plt.plot(all_liquidity_df['Year'], all_liquidity_df['AverageAmount'], marker='o', label='平均成交额')
    plt.plot(all_liquidity_df['Year'], all_liquidity_df['AverageTurnoverRate'], marker='o', label='平均换手率')
    plt.plot(all_liquidity_df['Year'], all_liquidity_df['MarketLiquidity'], marker='o', label='市场流动性')

    plt.title('2014-2024年市场流动性指标')
    plt.xlabel('年份')
    plt.ylabel('流动性指标（标准化后）')
    plt.xticks(all_liquidity_df['Year'])  # 显示年份
    plt.legend()
    plt.grid()
    plt.show()

# 主程序
if __name__ == "__main__":
    all_liquidity_df = pd.DataFrame()  # 创建一个空的数据框以存储所有年份的流动性指标
    for year in range(2014, 2025):
        stocks_data = load_data(year)
        liquidity_df = calculate_liquidity_index(stocks_data)
        all_liquidity_df = pd.concat([all_liquidity_df, liquidity_df], ignore_index=True)

        # 打印每年的流动性指标
        print(f"{year} 年的平均成交量: {liquidity_df['AverageVolume'].mean():.2f}, "
              f"平均成交额: {liquidity_df['AverageAmount'].mean():.2f}, "
              f"平均换手率: {liquidity_df['AverageTurnoverRate'].mean():.4f}, "
              f"市场流动性: {liquidity_df['MarketLiquidity'].mean():.4f}")

    # 检查最终合并的数据框是否为空
    if not all_liquidity_df.empty:
        visualize_liquidity_index(all_liquidity_df)
    else:
        print("没有有效的流动性指标数据可供绘图。")


In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

# 读取股票价格数据
try:
    price_data = pd.read_csv(
        './沪深300成分股的数据/hs300stocks_kdata_2014.csv')
except FileNotFoundError:
    print("文件 hs300stocks_kdata_2014.csv 未找到，请检查路径是否正确。")
    exit()

# 将日期列 "time" 转换为日期格式并保留日期部分
price_data['time'] = pd.to_datetime(price_data['time']).dt.date
price_data = price_data.set_index(['time', 'code'])

# 按股票代码计算每日收盘价的收益率矩阵
returns = price_data['close'].unstack(level=1).pct_change(fill_method=None).dropna()

# 读取权重数据
try:
    weights_data = pd.read_csv(
        r'./沪深300成分股的数据/hs300stocks_2014.csv')
except FileNotFoundError:
    print("文件 hs300stocks_2014.csv 未找到，请检查路径是否正确。")
    exit()

# 确保权重数据的列名为 'code' 和 'weight'
if 'code' not in weights_data.columns or 'weight' not in weights_data.columns:
    print("请检查权重数据的列名，确保其分别为 'code' 和 'weight'")
    exit()

# 设置股票代码为索引，并按 `returns` 中的股票代码对齐权重数据
weights_data = weights_data.set_index('code')
weights_data = weights_data.loc[returns.columns]  # 按 returns 中的股票代码对齐
stock_weights = weights_data['weight']
stock_weights /= stock_weights.sum()  # 归一化权重

# 确保对齐后的 returns 和 stock_weights 长度一致
if len(stock_weights) != returns.shape[1]:
    print("权重数据的长度与收益率数据的列数不匹配，请检查数据的一致性。")
    exit()

# 定义无风险利率
risk_free_rate = 0.03


# 定义夏普比率的负值（添加最大回撤惩罚项）
def sharpe_ratio_with_drawdown_penalty(weights, returns, risk_free_rate, max_drawdown=0.7, penalty_factor=100):
    mean_returns = returns.mean()
    cov_matrix = returns.cov()
    portfolio_return = np.dot(weights, mean_returns)
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_volatility

    # 计算组合的最大回撤
    portfolio_returns = returns.dot(weights)
    cumulative_returns = (portfolio_returns + 1).cumprod() - 1
    peak = cumulative_returns.cummax()
    drawdown = (peak - cumulative_returns) / peak
    max_drawdown_observed = drawdown.max()

    # 若最大回撤超出0.7，增加惩罚项
    penalty = penalty_factor * max(0, max_drawdown_observed - max_drawdown)
    return -sharpe_ratio + penalty


# 约束条件：权重之和为1
constraints = [{'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1}]

# 将单只股票的权重上限降到5%以增加分散化
weight_limit = 0.05
bounds = tuple((0, weight_limit) for _ in range(len(stock_weights)))

# 初始权重
initial_weights = stock_weights.values

# 优化
optimized = minimize(sharpe_ratio_with_drawdown_penalty, initial_weights, args=(returns, risk_free_rate),
                     method='SLSQP', bounds=bounds, constraints=constraints)

# 输出优化结果
if optimized.success:
    optimized_weights = optimized.x
    expected_return = np.dot(optimized_weights, returns.mean())
    volatility = np.sqrt(np.dot(optimized_weights.T, np.dot(returns.cov(), optimized_weights)))

    # 计算最终组合的最大回撤
    portfolio_returns = returns.dot(optimized_weights)
    cumulative_returns = (portfolio_returns + 1).cumprod() - 1
    peak = cumulative_returns.cummax()
    drawdown = (peak - cumulative_returns) / peak
    max_drawdown_observed = drawdown.max()

    print("Optimized Portfolio Weights:", optimized_weights)
    print("Expected Portfolio Return:", expected_return)
    print("Portfolio Volatility:", volatility)
    print("Max Drawdown:", max_drawdown_observed)
else:
    print("Optimization failed:", optimized.message)