# Task Description

In [None]:
# The task would be to test a long-only stock-based carry/value strategy.

# Carry strategy in stocks would be to long high dividend yield stocks and short the reverse (in this case no need to do short).
# Whole value strategy you should be familiar with.

# Carry trade in rate hike periods work pretty well (for fx especially). We are interested in whether the same hold for value. 
# The underlying assets would be S&P 500 stocks.

# Please take into account that some companies do stock repurchase instead of cash dividend.
# You could try to find adjusted dividend yield data. Time horizon is up to you, but keep in mind potential survivorship bias.
# It might be interesting to check the strategy performance during different periods. 
# Remember to make the backtest a way that they could be implemented (not longing 100 stocks at the same time). 
# Have a nice night and good luck with your presentation on Monday.

# Setup

## Load Packages

In [1]:
#### Import required Packages ####
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import scipy as sp
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
import sklearn as sk
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error # to calculate the MSE
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import cross_val_score

from statsmodels.graphics.tsaplots import plot_acf # To produce ACF plots
from statsmodels.graphics.tsaplots import plot_pacf # To produce PACF plots
from statsmodels.tsa.seasonal import seasonal_decompose # To decompose Seasons
from statsmodels.tsa.stattools import adfuller, kpss # Tests for Stationarity
from statsmodels.tsa.ar_model import AutoReg # To produce AR models
from statsmodels.stats.anova import anova_lm # To use ANOVA (compare nested models)
from statsmodels.tsa.arima.model import ARIMA # To build ARMA & ARIMA Models
import statsmodels.stats.diagnostic as dg # To get Breusch-Godfrey Test
from statsmodels.stats.stattools import durbin_watson



from datetime import datetime # to transform variables into datetime objects
import math # simple math functions
from math import sqrt # square root function
import statistics # descriptive statistics library
import scipy.stats as stats # descriptive statistics library from scipy
import matplotlib.dates as mdates # date formatting
from matplotlib.collections import PolyCollection, LineCollection # better plot options

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


## Plotstyle

In [None]:
# Seaborn plot style ticks to have nicer looking plots
sb.set_style("ticks")
sb.mpl.rc("figure", figsize=(16,8))
sb.mpl.rc("font", size=14)
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'

## LoadData

In [2]:
# Loading Data (takes some time)

div = pd.read_excel("02_Data_clean/SPX_div_clean.xlsx", parse_dates=["Date"])
ret = pd.read_excel("02_Data_clean/SPX_px_clean.xlsx", parse_dates=["Date"])

# Check
# print(type(div))
# print(div.dtypes)
# print(type(ret))
# print(ret.dtypes)

In [None]:
# set date columns as indices

ret.set_index(['Date'], inplace = True)
#ret.head()

## Data Cleaning

In [None]:
# Resample monthly returns (takes some time)
mtl_ret = ret.pct_change().resample("M").agg(lambda x: ((1+x).prod()-1)) # pct_change creates ordinary returns, resample Monthly and aggregating with the (1+x) -1 formula to get monthly ordinary returns

# df = pd.concat([data, data2], axis=1) # concating dfs
# data.set_index("Date", inplace=True) # set datetimeindex

## Sorting the signal

In [3]:
# in a first step we create a dataframe which we will fill with the weights of the carry strategy each month
### Important ### for carry we should use the (expected) dividend yield minus the riskfree ### Important ###

cols = (div.columns[1:])
rows = div.iloc[:,0]

weights = pd.DataFrame(index = rows, columns = (cols))

In [4]:
# the approach is as follows: 
# 1. take a moving average of the 12 month previous carry signal, ignoring the most recent month
# 2. for each month calculate the 10 stocks considered for the long strategy
# 3. calculate the PF return
# 4. take into account the transaction costs
divv = div
div = div.set_index(['Date'])

In [5]:
# resample the data monthly and take the avg of the div yield

div = div.resample('M').mean()
#div

In [None]:
# 1. take a moving average of the 12 month previous carry signal, ignoring the most recent month

# omitted at first for simpler calculations

In [6]:
# 2. for each month calculate the 10 stocks considered for the long strategy (takes some time)

for i in div.index:
    x = div.loc[i,:].nlargest(10)
    for j in div.columns:
        if j in x.index:
            weights.loc[i,j] = 0.1
        else:
            weights.loc[i,j] = 0
weights

Unnamed: 0_level_0,AEE UN Equity,ITT UN Equity,UN UN Equity,APD UN Equity,RDPL UN Equity,GOLD UN Equity,VZ UN Equity,CAT UN Equity,CVX UN Equity,KO UN Equity,...,PTC UQ Equity,QCOM UQ Equity,SPLS UQ Equity,XLNX UQ Equity,1519128D UQ Equity,NTAP UQ Equity,CTXS UQ Equity,9990253D UN Equity,0948669D UN Equity,CMCSK UQ Equity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1999-12-31,0,0.1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2000-01-07,,,,,,,,,,,...,,,,,,,,,,
2000-01-14,,,,,,,,,,,...,,,,,,,,,,
2000-01-21,,,,,,,,,,,...,,,,,,,,,,
2000-01-28,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-08-31,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-10-31,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-11-30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
x = div.loc['1999-12-31',:].nlargest(10)
x.index

In [None]:
div.columns

In [None]:
# 2. for each month calculate the 10 stocks considered for the long strategy

for i in range(len(div.iloc[:5,0])):
    x = div.iloc[i,:].nlargest(10)
    print(x)
    for j in div.index:
        if j in x.index:
            print(div.loc[i,j])
        else:
            print(div.loc[i,j])
weights