In [1]:
# 设定data路径
import os

os.chdir(r'F:\Study\Study Files\05大三上\金融统计分析\实验课1\1_Rawdata')

current_path = os.getcwd()
print(current_path)

import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

# 读取数据
data = pd.read_csv('TRD_Mnth.csv')
data = data.drop(['Markettype'], axis=1)

# 确保数据按股票代码和日期排序
data = data.sort_values(['Stkcd', 'Trdmnt'])
data['Trdmnt'] = pd.to_datetime(data['Trdmnt'])

# 设置回测期
data_trade = data[(data['Trdmnt'] >= '2002-01-01') & (data['Trdmnt'] <= '2002-12-31')]

# 初始化一个3x3的DataFrame来保存不同策略的收益
result = pd.DataFrame(index=[1,3,6], columns=[1,3,6])

# 获取唯一的年月，作为回测的月份点
unique_year_month = data_trade['Trdmnt'].dt.to_period('M').unique()

F:\Study\Study Files\05大三上\金融统计分析\实验课1\1_Rawdata


In [2]:
print(unique_year_month)

<PeriodArray>
['2002-01', '2002-02', '2002-03', '2002-04', '2002-05', '2002-06', '2002-07',
 '2002-08', '2002-09', '2002-10', '2002-11', '2002-12']
Length: 12, dtype: period[M]


In [3]:
form_period = 3
hold_period = 3
 
all_returns = []

current_ym = unique_year_month[0]


# 将年月转换为实际的日期
end_date = pd.to_datetime(current_ym.to_timestamp())
start_date = end_date - pd.DateOffset(months=form_period)  # 形成期起始日期
hold_end_date = end_date + pd.DateOffset(months=hold_period)  # 持有期结束日期

print('current_ym 是',current_ym)
print('end_date 是',end_date)
print('start_date 是',start_date)
print('hold_end_date 是',hold_end_date)


current_ym 是 2002-01
end_date 是 2002-01-01 00:00:00
start_date 是 2001-10-01 00:00:00
hold_end_date 是 2002-04-01 00:00:00


In [5]:
mask = (data['Trdmnt'] > end_date) & (data['Trdmnt'] <= hold_end_date)

data_mask = data.loc[mask]

data_mask.describe()

Unnamed: 0,Stkcd,Trdmnt,Msmvosd,Mretwd
count,3767.0,3767,3767.0,3756.0
mean,356133.289089,2002-03-02 00:57:20.403504128,1085568.0,0.05398
min,1.0,2002-02-01 00:00:00,35880.0,-0.649533
25%,751.5,2002-02-01 00:00:00,590790.4,0.012112
50%,600103.0,2002-03-01 00:00:00,855200.0,0.042991
75%,600637.5,2002-04-01 00:00:00,1282185.0,0.085148
max,900957.0,2002-04-01 00:00:00,16207660.0,1.234496
std,307833.373885,,960275.0,0.073785


In [None]:


# 计算形成期内的累计收益
mask = (data['Trdmnt'] >= start_date) & (data['Trdmnt'] < end_date)
form_returns = data.loc[mask].groupby('Stkcd')['Mretwd'].apply(lambda x: (1 + x).prod() - 1)

# 检查在持有期间是否有数据
mask = (data['Trdmnt'] > end_date) & (data['Trdmnt'] <= hold_end_date)
available_stocks = data.loc[mask]['Stkcd'].unique()

# 仅保留那些在持有期内有数据的股票
form_returns = form_returns[form_returns.index.isin(available_stocks)]

# 从剩下的股票中确定赢家和输家

winners = form_returns.nlargest(10).index
losers = form_returns.nsmallest(10).index

# 计算持有期收益
mask = (data['Trdmnt'] > end_date) & (data['Trdmnt'] <= hold_end_date)
hold_returns = data.loc[mask].groupby(['Stkcd', 'Trdmnt'])['Mretwd'].apply(lambda x: ((1 + x).prod())**(1/hold_period) - 1).unstack().mean(axis=1)

winner_returns = hold_returns.loc[winners].mean()
loser_returns = hold_returns.loc[losers].mean()

# 计算策略收益：赢家收益 - 输家收益
strategy_return = winner_returns - loser_returns

all_returns.append(strategy_return)
    
# 删除缺失值
all_returns_clean = [x for x in all_returns if not np.isnan(x)]

# 计算累计收益率、几何平均收益率和年收益率
cumulative_returns = [x + 1 for x in all_returns_clean]
geometric_mean = np.prod(cumulative_returns) - 1
annual_returns = (geometric_mean + 1) ** 12 - 1

result.loc[form_period, hold_period] = annual_returns


In [None]:
sns.heatmap(result.astype(float), annot=True, fmt=".2%", cmap="RdYlGn", center=0)
plt.title("Momentum Strategy Returns (Jegadeesh and Titman, 1993)")
plt.xlabel("Holding Period (months)")
plt.ylabel("Formation Period (months)")
plt.show()