In [1]:
# 设定data路径
import os
os.chdir(r'F:\Study\Study Files\05大三上\金融统计分析\实验课1\1_Rawdata')

In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

# 读取数据
data = pd.read_csv('TRD_Mnth.csv')
data = data.drop(['Markettype'], axis=1)

In [3]:
forming_month_j = [1, 3, 6]
holding_month_k = [1, 3, 6]
winners_and_losers_count = 10

In [None]:
# 确保数据按股票代码和日期排序
data = data.sort_values(['Stkcd', 'Trdmnt'])
data['Trdmnt'] = pd.to_datetime(data['Trdmnt'])

# 设置回测期
data_trade = data[(data['Trdmnt'] >= '2021-01-01') & (data['Trdmnt'] <= '2021-12-31')]

# 初始化一个3x3的DataFrame来保存不同策略的收益
result = pd.DataFrame(index=forming_month_j, columns=holding_month_k)

# 获取唯一的年月，作为回测的月份点
unique_year_month = data_trade['Trdmnt'].dt.to_period('M').unique()

1

In [4]:
# 开始循环：不同形成期和持有期
for form_period in forming_month_j:
    for hold_period in holding_month_k:

        all_returns = []

        # 遍历每一个唯一的年月
        for current_ym in unique_year_month:

            end_date = pd.to_datetime(current_ym.to_timestamp())
            start_date = end_date - pd.DateOffset(months=form_period)
            hold_end_date = end_date + pd.DateOffset(months=hold_period)

            if hold_end_date > data['Trdmnt'].max():
                continue
            
            # 计算形成期收益率，得到winers和losers
            mask = (data['Trdmnt'] > start_date) & (data['Trdmnt'] <= end_date)
            form_returns = data.loc[mask].groupby('Stkcd')['Mretwd'].apply(lambda x: (1 + x).prod() - 1)

            mask = (data['Trdmnt'] > end_date) & (data['Trdmnt'] <= hold_end_date)
            available_stocks = data.loc[mask]['Stkcd'].unique()
            form_returns = form_returns[form_returns.index.isin(available_stocks)]

            winners = form_returns.nlargest(winners_and_losers_count).index
            losers = form_returns.nsmallest(winners_and_losers_count).index

            # 计算持有期收益率
            # 初始化两个用来存储赢家和输家收益的空列表
            winners_returns = []
            losers_returns = []

            # 对于每一个月，计算赢家和输家的收益率，并将其添加到对应的列表中
            for m in range(1, hold_period + 1):
                mask = (data['Trdmnt'] == (end_date + pd.DateOffset(months=m)))
                current_month_data = data.loc[mask]

                # 选择赢家和输家在该月的数据，计算收益率，并将其添加到列表中
                winners_returns.append(current_month_data[current_month_data['Stkcd'].isin(winners)]['Mretwd'].mean())
                losers_returns.append(current_month_data[current_month_data['Stkcd'].isin(losers)]['Mretwd'].mean())

            # 对于赢家和输家，将月度收益率加1后进行连乘，再减1，得到整个持有期的收益率
            winner_return = np.prod([1 + r for r in winners_returns]) - 1
            loser_return = np.prod([1 + r for r in losers_returns]) - 1

            # 计算策略收益：赢家收益 - 输家收益
            strategy_return = winner_return - loser_return
            all_returns.append(strategy_return)

        all_returns_clean = [x for x in all_returns if not np.isnan(x)]
        cumulative_returns = [x + 1 for x in all_returns_clean]
        geometric_mean = np.prod(cumulative_returns) - 1
        annual_returns = (geometric_mean + 1) ** 12 - 1

        result.loc[form_period, hold_period] = annual_returns



In [None]:
sns.heatmap(result.astype(float), annot=True, fmt=".2%", cmap="vlag", center=0)
plt.title("Momentum Strategy Returns (Jegadeesh and Titman, 1993)")
plt.xlabel("Holding Period (months)")
plt.ylabel("Formation Period (months)")
#plt.savefig("v2-2012.png")  # 将图像保存为heatmap.png
plt.show()

2

In [None]:
# 开始循环：不同形成期和持有期
for form_period in forming_month_j:
    for hold_period in holding_month_k:

        all_returns = []
        past_winners = []
        past_losers = []

        # 遍历每一个唯一的年月
        for current_ym in unique_year_month:

            end_date = pd.to_datetime(current_ym.to_timestamp())
            start_date = end_date - pd.DateOffset(months=form_period)
            hold_end_date = end_date + pd.DateOffset(months=hold_period)

            if hold_end_date > data['Trdmnt'].max():
                continue

            mask = (data['Trdmnt'] > start_date) & (data['Trdmnt'] <= end_date)
            form_returns = data.loc[mask].groupby('Stkcd')['Mretwd'].apply(lambda x: (1 + x).prod() - 1)

            mask = (data['Trdmnt'] > end_date) & (data['Trdmnt'] <= hold_end_date)
            available_stocks = data.loc[mask]['Stkcd'].unique()
            form_returns = form_returns[form_returns.index.isin(available_stocks)]

            winners = form_returns.nlargest(winners_and_losers_count).index.tolist()
            losers = form_returns.nsmallest(winners_and_losers_count).index.tolist()

            past_winners.append(winners)
            past_losers.append(losers)

            # 在计算收益率时，我们将使用过去的赢家/输家，依据您的需要来选择是否这样做。
            if len(past_winners) > hold_period:
                past_winners.pop(0)
                past_losers.pop(0)

            all_winner_returns = []
            all_loser_returns = []

            for i, past_ym in enumerate(range(1, len(past_winners) + 1)):
                current_month = end_date + pd.DateOffset(months=past_ym)
                mask = data['Trdmnt'] == current_month

                # 这里你可以进一步调整赢家输家的选择逻辑。
                # 可能直接使用`past_winners[i]`就够用了
                current_month_data = data.loc[mask]
                all_winner_returns.append(current_month_data[current_month_data['Stkcd'].isin(past_winners[i])]['Mretwd'].mean())
                all_loser_returns.append(current_month_data[current_month_data['Stkcd'].isin(past_losers[i])]['Mretwd'].mean())

            # 注意：这里我计算的是所有考虑月份的平均收益率。
            winner_return = np.mean(all_winner_returns)
            loser_return = np.mean(all_loser_returns)

            # 计算策略收益：赢家收益 - 输家收益
            strategy_return = winner_return - loser_return
            all_returns.append(strategy_return)

        # 移除nan，计算几何平均等
        all_returns_clean = [x for x in all_returns if not np.isnan(x)]
        cumulative_returns = [x + 1 for x in all_returns_clean]
        geometric_mean = np.prod(cumulative_returns) - 1
        annual_returns = (geometric_mean + 1) ** 12 - 1

        result.loc[form_period, hold_period] = annual_returns

# 将结果输出
print(result)


3

In [None]:
# 为了计算在unique_year_month后的收益，添加两个月到循环中
# 注意，这里只向后拓展了2个月，对应k=3的情况，需要普适化
extra_two_months = pd.PeriodIndex([unique_year_month[-1] + 1, unique_year_month[-1] + 2], freq='M')
full_year_month = unique_year_month.union(extra_two_months)

# 开始循环：不同形成期和持有期
for form_period in forming_month_j:
    for hold_period in holding_month_k:

        all_returns = []
        past_winners = []
        past_losers = []

        # 遍历每一个唯一的年月，包括额外的两个月
        for current_ym in full_year_month:

            # ... [之前的代码部分不变]

            # 使用最近的winners和losers计算收益
            winners = past_winners[-1] if past_winners else []
            losers = past_losers[-1] if past_losers else []

            # 计算当月收益
            mask = (data['Trdmnt'] == current_ym)
            current_winner_return = data.loc[mask & data['Stkcd'].isin(winners)]['Mretwd'].mean()
            current_loser_return = data.loc[mask & data['Stkcd'].isin(losers)]['Mretwd'].mean()

            # 仅在current_ym属于unique_year_month时更新past_winners和past_losers
            if current_ym in unique_year_month:
                end_date = pd.to_datetime(current_ym.to_timestamp())
                start_date = end_date - pd.DateOffset(months=form_period)

                mask = (data['Trdmnt'] > start_date) & (data['Trdmnt'] <= end_date)
                form_returns = data.loc[mask].groupby('Stkcd')['Mretwd'].apply(lambda x: (1 + x).prod() - 1)
                
                available_stocks = data.loc[data['Trdmnt'] > end_date]['Stkcd'].unique()
                form_returns = form_returns[form_returns.index.isin(available_stocks)]
                
                winners = form_returns.nlargest(winners_and_losers_count).index.tolist()
                losers = form_returns.nsmallest(winners_and_losers_count).index.tolist()

                past_winners.append(winners)
                past_losers.append(losers)

                # 如果我们的列表变得过长，就移除旧的元素
                if len(past_winners) > hold_period:
                    past_winners.pop(0)
                    past_losers.pop(0)

            # 计算策略收益：赢家收益 - 输家收益
            strategy_return = current_winner_return - current_loser_return
            all_returns.append(strategy_return)

        # 移除nan，计算几何平均等
        all_returns_clean = [x for x in all_returns if not np.isnan(x)]
        cumulative_returns = [x + 1 for x in all_returns_clean]
        geometric_mean = np.prod(cumulative_returns) - 1
        annual_returns = (geometric_mean + 1) ** 12 - 1

        result.loc[form_period, hold_period] = annual_returns


4

In [None]:

unique_year_month = data_trade['Trdmnt'].unique()

extra_two_months = pd.PeriodIndex([unique_year_month[-1] + 1, unique_year_month[-1] + 2], freq='M')
full_year_month = unique_year_month.union(extra_two_months)

# 开始循环：不同形成期和持有期
for form_period in forming_month_j:
    for hold_period in holding_month_k:

        all_returns = []
        past_winners = []
        past_losers = []

        # 遍历每一个唯一的年月，包括额外的两个月
        for current_ym in full_year_month:

            end_date = pd.to_datetime(current_ym.to_timestamp())
            start_date = end_date - pd.DateOffset(months=form_period)

            mask = (data['Trdmnt'] > start_date) & (data['Trdmnt'] <= end_date)
            form_returns = data.loc[mask].groupby('Stkcd')['Mretwd'].apply(lambda x: (1 + x).prod() - 1)

            available_stocks = data.loc[data['Trdmnt'] > end_date]['Stkcd'].unique()
            form_returns = form_returns[form_returns.index.isin(available_stocks)]

            winners = form_returns.nlargest(winners_and_losers_count).index.tolist()
            losers = form_returns.nsmallest(winners_and_losers_count).index.tolist()

            past_winners.append(winners)
            past_losers.append(losers)

            # 如果我们的列表变得过长，就移除旧的元素
            if len(past_winners) > hold_period:
                past_winners.pop(0)
                past_losers.pop(0)

            # 计算当月所有past_winners和past_losers的平均收益
            mask = (data['Trdmnt'] == current_ym)
            current_winner_returns = [
                data.loc[mask & data['Stkcd'].isin(winners_month)]['Mretwd'].mean() 
                for winners_month in past_winners
            ]
            current_loser_returns = [
                data.loc[mask & data['Stkcd'].isin(losers_month)]['Mretwd'].mean() 
                for losers_month in past_losers
            ]
            
            # 取算数平均
            current_winner_return = np.nanmean(current_winner_returns)
            current_loser_return = np.nanmean(current_loser_returns)

            # 计算策略收益：赢家收益 - 输家收益
            strategy_return = current_winner_return - current_loser_return
            all_returns.append(strategy_return)

        # 移除nan，计算几何平均等
        all_returns_clean = [x for x in all_returns if not np.isnan(x)]
        cumulative_returns = [x + 1 for x in all_returns_clean]
        geometric_mean = np.prod(cumulative_returns) - 1
        annual_returns = (geometric_mean + 1) ** 12 - 1

        result.loc[form_period, hold_period] = annual_returns