# Task Description

In [1]:
# The task would be to test a long-only stock-based carry/value strategy.

# Carry strategy in stocks would be to long high dividend yield stocks and short the reverse (in this case no need to do short).
# Whole value strategy you should be familiar with.

# Carry trade in rate hike periods work pretty well (for fx especially). We are interested in whether the same hold for value. 
# The underlying assets would be S&P 500 stocks.

# Please take into account that some companies do stock repurchase instead of cash dividend.
# You could try to find adjusted dividend yield data. Time horizon is up to you, but keep in mind potential survivorship bias.
# It might be interesting to check the strategy performance during different periods. 
# Remember to make the backtest a way that they could be implemented (not longing 100 stocks at the same time). 
# Have a nice night and good luck with your presentation on Monday.

# Setup

## Load Packages

In [2]:
#### Import required Packages ####
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import scipy as sp
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
import sklearn as sk
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error # to calculate the MSE
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import cross_val_score

from statsmodels.graphics.tsaplots import plot_acf # To produce ACF plots
from statsmodels.graphics.tsaplots import plot_pacf # To produce PACF plots
from statsmodels.tsa.seasonal import seasonal_decompose # To decompose Seasons
from statsmodels.tsa.stattools import adfuller, kpss # Tests for Stationarity
from statsmodels.tsa.ar_model import AutoReg # To produce AR models
from statsmodels.stats.anova import anova_lm # To use ANOVA (compare nested models)
from statsmodels.tsa.arima.model import ARIMA # To build ARMA & ARIMA Models
import statsmodels.stats.diagnostic as dg # To get Breusch-Godfrey Test
from statsmodels.stats.stattools import durbin_watson



from datetime import datetime # to transform variables into datetime objects
import math # simple math functions
from math import sqrt # square root function
import statistics # descriptive statistics library
import scipy.stats as stats # descriptive statistics library from scipy
import matplotlib.dates as mdates # date formatting
from matplotlib.collections import PolyCollection, LineCollection # better plot options

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


In [3]:
def h(x, y): 
    return x * y                             
vecmult = np.vectorize(h)    # Use pd.DataFrame(vecmult(A,B)) to get df of elementwise multiplication

## Plotstyle

In [4]:
# Seaborn plot style ticks to have nicer looking plots
sb.set_style("ticks")
sb.mpl.rc("figure", figsize=(16,8))
sb.mpl.rc("font", size=14)
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'

## LoadData

In [5]:
# Loading Data (takes some time)

div = pd.read_excel("02_Data_clean/SPX_div_clean_Nick.xlsx", parse_dates=["Date"])
ret = pd.read_excel("02_Data_clean/SPX_px_clean_Nick.xlsx", parse_dates=["Date"])

# Check
# print(type(div))
# print(div.dtypes)
# print(type(ret))
# print(ret.dtypes)

KeyboardInterrupt: 

In [None]:
# # set date columns as indices

# ret.set_index(['Date'], inplace = True)
# #ret.head()

# # Resample monthly returns (takes some time)
# mtl_ret = ret.pct_change().resample("M").agg(lambda x: ((1+x).prod()-1)) # pct_change creates ordinary returns, resample Monthly and aggregating with the (1+x) -1 formula to get monthly ordinary returns

# # df = pd.concat([data, data2], axis=1) # concating dfs
# # data.set_index("Date", inplace=True) # set datetimeindex

## Sorting the signal

In [None]:
# in a first step we create a dataframe which we will fill with the weights of the carry strategy each month
### Important ### for carry we should use the (expected) dividend yield minus the riskfree ### Important ###
divv = div
cols = (divv.columns[1:])
rows = divv.iloc[:,0]

weights = pd.DataFrame(index = rows, columns = (cols))
weights = weights.resample("M").mean()

In [None]:
weights

In [None]:
# the approach is as follows: 
# 1. take a moving average of the 12 month previous carry signal, ignoring the most recent month
# 2. for each month calculate the 10 stocks considered for the long strategy
# 3. calculate the PF return
# 4. take into account the transaction costs

div = div.set_index(['Date'])

In [None]:
# resample the data monthly and take the avg of the div yield

div = div.resample('M').mean()
#div

In [None]:
# 1. take a moving average of the 12 month previous carry signal, ignoring the most recent month

# omitted at first for simpler calculations

In [None]:
# 2. for each month calculate the 10 stocks considered for the long strategy (takes some time)

for i in div.index:
    x = div.loc[i,:].nlargest(10)
    for j in div.columns:
        if j in x.index:
            weights.loc[i,j] = 0.1
        else:
            weights.loc[i,j] = 0
weights

## Return calculation

In [None]:
# in a first step we create a dataframe which we will fill with the weighted returns of the carry strategy each month

weighted_ret = weights

In [None]:
for i in weighted_ret.index:
    for j in weighted_ret.columns:
        weighted_ret.loc[i,j] = weighted_ret.loc[i,j]*mtl_ret.loc[i,j]
        
weighted_ret['PF_ret'] = 0
weighted_ret

In [None]:
# now we sum over each row to calculate the PF return and put it in the respective columns

for i in weighted_ret.index:
    weighted_ret.loc[i,'PF_ret'] = weighted_ret.loc[i,:].sum()

weighted_ret

In [None]:
weighted_ret['PF_ret'].agg(lambda x: ((1+x).prod()-1))

In [None]:
sb.ecdfplot(x = weighted_ret['PF_ret'])

# Approach 2 --> running 12-month avg of carry signal --> weekly rebalance

In [None]:
# the approach is as follows: 
# 1. take a moving average of the 12 month previous carry signal, ignoring the most recent month
# 2. for each month calculate the 10 stocks considered for the long strategy
# 3. calculate the PF return
# 4. take into account the transaction costs

In [None]:
# Loading Data (takes some time)

div = pd.read_excel("02_Data_clean/SPX_div_clean_Nick.xlsx", parse_dates=["Date"])
ret = pd.read_excel("02_Data_clean/SPX_px_clean_Nick.xlsx", parse_dates=["Date"])

# safe the div as divv in case we need it later
divv = div 

# reset the index to be a date
div = div.set_index(['Date'])
ret = ret.set_index(['Date'])

# Check
# print(type(div))
# print(div.dtypes)
# print(type(ret))
# print(ret.dtypes)

In [None]:
# create 12-month running avg of carry signal (i.e., div yield) lagged by one month

for i in range(52, len(div.index)-4):
    x = div.iloc[i-52:i,:].mean()
    div.iloc[i+4,:] = x


In [None]:
# in a first step we create a dataframe which we will fill with the weights of the carry strategy each month
### Important ### for carry we should use the (expected) dividend yield minus the riskfree ### Important ###
cols = divv.columns[1:]
rows = divv.iloc[:,0]

weights = pd.DataFrame(index = rows, columns = (cols))

In [None]:
# create weights dataframe

for i in div.index:
    x = div.loc[i,:].nlargest(10)
    for j in div.columns:
        if j in x.index:
            weights.loc[i,j] = 0.1
        else:
            weights.loc[i,j] = 0

weighted_ret = weights
weights

In [None]:
# calculate PF returns (takes ages)

for i in weighted_ret.index[52+4:]:
    for j in weighted_ret.columns:
        weighted_ret.loc[i,j] = weighted_ret.loc[i,j]*ret.loc[i,j]
        
weighted_ret['PF_ret'] = 0
weighted_ret

# first 52 weeks have no return calculated since they are only used for PF weights creation in 2000-12

In [None]:
# now we sum over each row to calculate the PF return and put it in the respective columns

for i in weighted_ret.index[52+4:]:
    weighted_ret.loc[i,'PF_ret'] = weighted_ret.loc[i,:].sum()

weighted_ret

In [None]:
weighted_ret.iloc[52+4:,500].agg(lambda x: ((1+x).prod()-1))

In [None]:
x = [i for i in weighted_ret['PF_ret'] if i > 0.05]
x

In [None]:
y = [i for i in weighted_ret['PF_ret'] if i < -0.05]
y

In [None]:
sb.scatterplot(y = weighted_ret['PF_ret'], x = weighted_ret.index, s = 5)

In [None]:
sb.lineplot(y = weighted_ret['PF_ret'], x = weighted_ret.index)

In [None]:
x = 1
for i in weighted_ret.iloc[52:,500]:
    #print(i)
    x = (1+i)*x
print(x)
PF_return_annual = (x**(52/225))-1
PF_return_annual

# yielding an annual return of 6%

In [None]:
weighted_ret.iloc[52:,500].shape

In [None]:
PF_mean = weighted_ret['PF_ret'].mean()
PF_mean

In [None]:
PF_std = weighted_ret['PF_ret'].std() * (52)**0.5

In [None]:
(PF_return_annual - 0.01)/PF_std

# Approach 3 --> running 12-month avg of carry signal --> monthly rebalanced

In [None]:
# Loading Data (takes some time)

div = pd.read_excel("02_Data_clean/SPX_div_clean_Nick.xlsx", parse_dates=["Date"])
ret = pd.read_excel("02_Data_clean/SPX_px_clean_Nick.xlsx", parse_dates=["Date"])

# safe the div as divv in case we need it later
divv = div 

# reset the index to be a date
div = div.set_index(['Date'])
ret = ret.set_index(['Date'])

# Check
# print(type(div))
# print(div.dtypes)
# print(type(ret))
# print(ret.dtypes)

In [None]:
# Resample monthly dividends (always take the last of the month to be the monthly div yield)
mtl_div = div.resample("M").ffill()
rett = ret.resample("M").agg(lambda x: ((1+x).prod()-1))
#print(mtl_div)
#print(rett)

In [None]:
# create 12-month running avg of carry signal (i.e., div yield) lagged by one month

for i in range(12, len(div.index)-1):
    x = div.iloc[i-12:i,:].mean()
    div.iloc[i+1,:] = x


In [None]:
# in a first step we create a dataframe which we will fill with the weights of the carry strategy each month

cols = divv.columns[1:]
rows = divv.iloc[:,0]

weights = pd.DataFrame(index = rows, columns = (cols))
weights = weights.resample("M").ffill()

In [None]:
# create weights dataframe

for i in mtl_div.index:
    x = mtl_div.loc[i,:].nlargest(10)
    for j in mtl_div.columns:
        if j in x.index:
            weights.loc[i,j] = 0.1
        else:
            weights.loc[i,j] = 0

weights

In [None]:
# # cut before being able to calculate

# des_weights = weights.loc['2001-01-31':'2022-12-02',:]
# ret_2 = rett.loc['2001-01-31':'2022-12-02',:]
# weighted_ret = pd.DataFrame(vecmult(des_weights, ret_2))
# weighted_ret

# # gid bimir was falsches

In [None]:
# calculate PF returns (takes some time)
weighted_ret = weights

for i in weights.index[12+1:]:
    for j in weights.columns:
        weighted_ret.loc[i,j] = weighted_ret.loc[i,j]*rett.loc[i,j]
        
weighted_ret['PF_ret'] = 0
weighted_ret

# first 12 months have no return calculated since they are only used for PF weights creation in 2001

In [None]:
# now we sum over each row to calculate the PF return and put it in the respective columns

for i in weighted_ret.index[12+1:]:
    weighted_ret.loc[i,'PF_ret'] = weighted_ret.loc[i,:].sum()

weighted_ret

In [None]:
sb.lineplot(y = weighted_ret['PF_ret'], x = weighted_ret.index)

In [None]:
x = [i for i in weighted_ret['PF_ret'] if i > 0.05]
x

In [None]:
x = [i for i in weighted_ret['PF_ret'] if i < -0.05]
x

In [None]:
x = 1
for i in weighted_ret.iloc[12+1:,500]:
    #print(i)
    x = (1+i)*x
PF_return_annual = (x**(12/(264)))-1
PF_return_annual

# yielding an annual return of -23%

In [None]:
weighted_ret.iloc[12+1:,500].shape