Based on the papers:

@article{wood2021trading,
  title={Trading with the Momentum Transformer: An Intelligent and Interpretable Architecture},
  author={Wood, Kieran and Giegerich, Sven and Roberts, Stephen and Zohren, Stefan},
  journal={arXiv preprint arXiv:2112.08534},
  year={2021}
}

@article {Wood111,
	author = {Wood, Kieran and Roberts, Stephen and Zohren, Stefan},
	title = {Slow Momentum with Fast Reversion: A Trading Strategy Using Deep Learning and Changepoint Detection},
	volume = {4},
	number = {1},
	pages = {111--129},
	year = {2022},
	doi = {10.3905/jfds.2021.1.081},
	publisher = {Institutional Investor Journals Umbrella},
	issn = {2640-3943},
	URL = {https://jfds.pm-research.com/content/4/1/111},
	eprint = {https://jfds.pm-research.com/content/4/1/111.full.pdf},
	journal = {The Journal of Financial Data Science}
}

In [None]:
!pip install empyrical-reloaded

In [8]:
from empyrical import (sharpe_ratio, max_drawdown, downside_risk, annual_return, annual_volatility,)
from typing import Dict, List, Optional, Tuple, Union
import pandas as pd
import numpy as np
import os

import warnings
warnings.filterwarnings('ignore')

In [9]:
def calc_returns(srs: pd.Series, day_offset: int = 1):
    returns = srs / srs.shift(day_offset) - 1
    return returns

In [10]:
def calc_daily_vol(daily_returns):
    return (
        daily_returns.ewm(span = 60, min_periods = 60).std().fillna(method="bfill")
    )

In [11]:
def calc_vol_scaled_returns(daily_returns, daily_vol=pd.Series(None)):
    if not len(daily_vol):
        daily_vol = calc_daily_vol(daily_returns)
    annualized_vol = daily_vol * np.sqrt(252)
    return daily_returns * 0.15 / annualized_vol.shift(1) #Had multiplication by target vol but don't care about that

In [12]:
class MACDStrat:
    def __init__(self, trend_combinations: List[Tuple[float, float]] = None):
        if trend_combinations is None:
            self.trend_combinations = [(8, 24), (16, 48), (32, 96)]
        else:
            self.trend_combinations = trend_combinations 
    
    @staticmethod
    def calc_signal(prices: pd.Series, short_timescale: int, long_timescale: int):

        def calc_halflife(timescale):
            return np.log(0.5) / np.log(1 - 1/timescale)
        
        macd = (
            prices.ewm(halflife= calc_halflife(short_timescale)).mean() - prices.ewm(halflife = calc_halflife(long_timescale)).mean()
        )

        q = macd / prices.rolling(63).std().fillna(method="bfill") #Standardize MACD with volatility 
        return q / q.rolling(252).std().fillna(method="bfill")


In [None]:
def read_changepoint_file(file_path: str, lookback_window_length: int):
    return (
        pd.read_csv(file_path, index_col=0, parse_dates=True)
        .fillna(method="ffill")
        .dropna() 
        .assign(
            cp_location_norm=lambda row: (row["t"] - row["cp_location"])/ lookback_window_length
        ) 
    )

In [77]:
def prepare_cpd_features(folder_path: str, lookback_window_length: int):
    return pd.concat(
        [
            read_changepoint_file(
                os.path.join(folder_path, f), lookback_window_length
            ).assign(ticker=os.path.splitext(f)[0])
            for f in os.listdir(folder_path)
        ]
    )

In [15]:
def deep_momentum_features(df_asset:pd.DataFrame):
    df_asset["srs"] = df_asset["close"]
    ewm = df_asset["srs"].ewm(halflife=252)
    means = ewm.mean()
    stds = ewm.std()
    df_asset["srs"] = np.minimum(df_asset["srs"], means + 5 * stds)
    df_asset["srs"] = np.maximum(df_asset["srs"], means - 5 * stds)

    df_asset["daily_returns"] = calc_returns(df_asset["srs"])
    df_asset["daily_vol"] = calc_daily_vol(df_asset["daily_returns"])

    df_asset["target_returns"] = calc_vol_scaled_returns(
        df_asset["daily_returns"], df_asset["daily_vol"]
    ).shift(-1)

    def calc_normalized_returns(day_offset):
        return (
            calc_returns(df_asset["srs"], day_offset) / df_asset["daily_vol"] / np.sqrt(day_offset)
        )

    df_asset["norm_daily_return"] = calc_normalized_returns(1)
    df_asset["norm_monthly_return"] = calc_normalized_returns(21)
    df_asset["norm_quarterly_return"] = calc_normalized_returns(63)
    df_asset["norm_biannual_return"] = calc_normalized_returns(126)
    df_asset["norm_annual_return"] = calc_normalized_returns(252)

    trend_combinations = [(8, 24), (16, 48), (32, 96)]
    for short_window, long_window in trend_combinations:
        df_asset[f"macd_{short_window}_{long_window}"] = MACDStrat.calc_signal(
            df_asset["srs"], short_window, long_window
        )

    # date features
    if len(df_asset):
        df_asset["day_of_week"] = df_asset.index.dayofweek
        df_asset["day_of_month"] = df_asset.index.day
        df_asset["week_of_year"] = df_asset.index.isocalendar().week
        df_asset["month_of_year"] = df_asset.index.month
        df_asset["year"] = df_asset.index.year
        df_asset["date"] = df_asset.index 
    else:
        df_asset["day_of_week"] = []
        df_asset["day_of_month"] = []
        df_asset["week_of_year"] = []
        df_asset["month_of_year"] = []
        df_asset["year"] = []
        df_asset["date"] = []
    
    return df_asset.dropna()

In [None]:
def include_changepoint_features(features: pd.DataFrame, cpd_folder_name: str, lookback_window_length: int):
    features = features.merge(
        prepare_cpd_features(cpd_folder_name, lookback_window_length)[
            ["ticker", "cp_location_norm", "cp_score"]
        ]
        .rename(
            columns={
                "cp_location_norm": f"cp_rl_{lookback_window_length}",
                "cp_score": f"cp_score_{lookback_window_length}"
            }
        )
        .reset_index(),
        on =["date", "ticker"]
    )

    features.index = features["date"]

    return features

In [None]:
#Use the wrds data with date and price as a dataframe for input to deep_momentum_features()
#Then we pass this features df to include_changepoint_features() along with the cpd folder and window length to find this file and add to the features


#Then save features to a csv to import to backtest (This will need to be done for each company)


In [27]:
!pip install wrds

Collecting wrds
  Using cached wrds-3.2.0-py3-none-any.whl (13 kB)
Collecting sqlalchemy<2.1,>=2
  Using cached SQLAlchemy-2.0.36-cp310-cp310-win_amd64.whl (2.1 MB)
Collecting psycopg2-binary<2.10,>=2.9
  Using cached psycopg2_binary-2.9.10-cp310-cp310-win_amd64.whl (1.2 MB)
Collecting packaging<23.3
  Using cached packaging-23.2-py3-none-any.whl (53 kB)
Collecting greenlet!=0.4.17
  Using cached greenlet-3.1.1-cp310-cp310-win_amd64.whl (298 kB)
Installing collected packages: psycopg2-binary, packaging, greenlet, sqlalchemy, wrds
  Attempting uninstall: packaging
    Found existing installation: packaging 24.1
    Uninstalling packaging-24.1:
      Successfully uninstalled packaging-24.1
Successfully installed greenlet-3.1.1 packaging-23.2 psycopg2-binary-2.9.10 sqlalchemy-2.0.36 wrds-3.2.0



[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [82]:
import wrds

conn = wrds.Connection()

WRDS recommends setting up a .pgpass file.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


In [83]:
ticker = 'LEN'

query = f"""
SELECT DISTINCT
    d.date,
    n.ticker,
    d.prc / d.cfacpr as close
FROM
    crsp.dsf as d
JOIN 
    crsp.dsenames as n on d.permno = n.permno
WHERE
    n.ticker = '{ticker}' and
    d.permno = 52708
    and date BETWEEN '2016-01-01' and '2023-12-31'
ORDER BY
    date
"""

df = conn.raw_sql(query)


In [84]:
df[260:280]

Unnamed: 0,date,ticker,close
260,2017-01-13,LEN,43.719866
261,2017-01-17,LEN,43.660865
262,2017-01-18,LEN,43.56253
263,2017-01-19,LEN,43.002019
264,2017-01-20,LEN,43.120021
265,2017-01-23,LEN,43.021686
266,2017-01-24,LEN,45.59807
267,2017-01-25,LEN,46.148748
268,2017-01-26,LEN,45.922577
269,2017-01-27,LEN,45.027726


In [85]:
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')

features = deep_momentum_features(df.copy())
features = features.reset_index(drop=True)

In [86]:
features = features[features['date'] >= "2017-02-02"]

In [87]:
features

Unnamed: 0,ticker,close,srs,daily_returns,daily_vol,target_returns,norm_daily_return,norm_monthly_return,norm_quarterly_return,norm_biannual_return,norm_annual_return,macd_8_24,macd_16_48,macd_32_96,day_of_week,day_of_month,week_of_year,month_of_year,year,date
20,LEN,44.064040,44.064040,-0.001782,0.015715,0.004428,-0.113403,0.611470,0.689781,-0.115261,0.441968,1.258781,0.742824,-0.371531,3,2,5,2,2017,2017-02-02
21,LEN,44.388546,44.388546,0.007364,0.015501,-0.004997,0.475092,0.371306,0.851360,-0.084056,0.463352,1.260344,0.791195,-0.337405,4,3,5,2,2017,2017-02-03
22,LEN,44.024706,44.024706,-0.008197,0.015333,-0.002065,-0.534576,0.112137,0.810583,-0.252019,0.544595,1.220532,0.820351,-0.319618,0,6,6,2,2017,2017-02-06
23,LEN,43.877203,43.877203,-0.003350,0.015097,0.007996,-0.221932,0.307685,0.649084,-0.253156,0.670842,1.149435,0.828863,-0.303438,1,7,6,2,2017,2017-02-07
24,LEN,44.437714,44.437714,0.012775,0.015009,0.000000,0.851105,0.547500,0.650463,-0.207861,0.739552,1.151552,0.866163,-0.262326,2,8,6,2,2017,2017-02-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1753,LEN,147.910000,147.910000,0.007630,0.020420,-0.001533,0.373656,1.781090,1.870533,0.913576,2.041952,1.587148,1.342239,1.266858,3,21,51,12,2023,2023-12-21
1754,LEN,147.420000,147.420000,-0.003313,0.020142,0.002482,-0.164472,1.699509,1.900208,0.962611,1.963080,1.541359,1.346940,1.282230,4,22,51,12,2023,2023-12-22
1755,LEN,148.200000,148.200000,0.005291,0.019809,0.003541,0.267096,1.775423,2.060166,0.789851,1.996594,1.501455,1.352449,1.298869,1,26,52,12,2023,2023-12-26
1756,LEN,149.300000,149.300000,0.007422,0.019487,-0.000390,0.380899,1.982939,2.176356,0.868723,2.093299,1.468283,1.358717,1.315999,2,27,52,12,2023,2023-12-27


In [88]:
features2 = include_changepoint_features(features, "Data/Changepoints/", 21)

Data/Changepoints/ADM.csv
Data/Changepoints/ALB.csv
Data/Changepoints/ALCO.csv
Data/Changepoints/AMZN.csv
Data/Changepoints/BA.csv
Data/Changepoints/BAC.csv
Data/Changepoints/BG.csv
Data/Changepoints/BKNG.csv
Data/Changepoints/C.csv
Data/Changepoints/CAH.csv
Data/Changepoints/CMCSA.csv
Data/Changepoints/DHI.csv
Data/Changepoints/DIS.csv
Data/Changepoints/DUK.csv
Data/Changepoints/FCX.csv
Data/Changepoints/FDP.csv
Data/Changepoints/GOOGL.csv
Data/Changepoints/GPC.csv
Data/Changepoints/GS.csv
Data/Changepoints/GWW.csv
Data/Changepoints/HSIC.csv
Data/Changepoints/INTC.csv
Data/Changepoints/JPM.csv
Data/Changepoints/KO.csv
Data/Changepoints/LEN.csv
Data/Changepoints/LMNR.csv
Data/Changepoints/MCK.csv
Data/Changepoints/META.csv
Data/Changepoints/NEE.csv
Data/Changepoints/NEM.csv
Data/Changepoints/NVR.csv
Data/Changepoints/PEP.csv
Data/Changepoints/PG.csv
Data/Changepoints/PHM.csv
Data/Changepoints/RGLD.csv
Data/Changepoints/SCCO.csv
Data/Changepoints/T.csv
Data/Changepoints/TOL.csv
Data/Cha

In [89]:
features2

Unnamed: 0_level_0,ticker,close,srs,daily_returns,daily_vol,target_returns,norm_daily_return,norm_monthly_return,norm_quarterly_return,norm_biannual_return,...,macd_16_48,macd_32_96,day_of_week,day_of_month,week_of_year,month_of_year,year,date,cp_rl_21,cp_score_21
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-02-02,LEN,44.064040,44.064040,-0.001782,0.015715,0.004428,-0.113403,0.611470,0.689781,-0.115261,...,0.742824,-0.371531,3,2,5,2,2017,2017-02-02,0.482116,0.961223
2017-02-03,LEN,44.388546,44.388546,0.007364,0.015501,-0.004997,0.475092,0.371306,0.851360,-0.084056,...,0.791195,-0.337405,4,3,5,2,2017,2017-02-03,0.528184,0.952850
2017-02-06,LEN,44.024706,44.024706,-0.008197,0.015333,-0.002065,-0.534576,0.112137,0.810583,-0.252019,...,0.820351,-0.319618,0,6,6,2,2017,2017-02-06,0.579471,0.980055
2017-02-07,LEN,43.877203,43.877203,-0.003350,0.015097,0.007996,-0.221932,0.307685,0.649084,-0.253156,...,0.828863,-0.303438,1,7,6,2,2017,2017-02-07,0.616583,0.994674
2017-02-08,LEN,44.437714,44.437714,0.012775,0.015009,0.000000,0.851105,0.547500,0.650463,-0.207861,...,0.866163,-0.262326,2,8,6,2,2017,2017-02-08,0.271415,0.930105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-21,LEN,147.910000,147.910000,0.007630,0.020420,-0.001533,0.373656,1.781090,1.870533,0.913576,...,1.342239,1.266858,3,21,51,12,2023,2023-12-21,0.308042,0.997405
2023-12-22,LEN,147.420000,147.420000,-0.003313,0.020142,0.002482,-0.164472,1.699509,1.900208,0.962611,...,1.346940,1.282230,4,22,51,12,2023,2023-12-22,0.339726,0.997530
2023-12-26,LEN,148.200000,148.200000,0.005291,0.019809,0.003541,0.267096,1.775423,2.060166,0.789851,...,1.352449,1.298869,1,26,52,12,2023,2023-12-26,0.408991,0.993508
2023-12-27,LEN,149.300000,149.300000,0.007422,0.019487,-0.000390,0.380899,1.982939,2.176356,0.868723,...,1.358717,1.315999,2,27,52,12,2023,2023-12-27,0.464861,0.982308


In [90]:
features2.to_csv("Data/Finished_Datasets/LEN.csv")

In [46]:
t = pd.read_csv("Data/Finished_Datasets/LEN.csv")

In [48]:
t['date'] = pd.to_datetime(t['date']) 
# Filter the DataFrame to start from "2017-02-02" 
filtered_t = t[t['date'] >= "2017-02-02"]

In [50]:
filtered_t= filtered_t.set_index('date')

In [52]:
filtered_t.to_csv("Data/Finished_Datasets/LEN.csv")