## Technical Analysis Shenanigans

Exploring the TA library and gathering data to use for our MVP

In [1]:
!pip install ta nbimporter wrds cufflinks chart_studio plotly



You should consider upgrading via the 'c:\users\rjsmi\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.


Permission needed to get cufflinks and plotly to work in JupyterLab:
`!jupyter labextension install jupyterlab-plotly`

In [2]:
import nbimporter
from credentials import WRDS_USERNAME

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
import csv
import wrds
import cufflinks as cf

In [3]:
from ta import add_all_ta_features
from ta.utils import dropna

In [4]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
  
# to get the connection
init_notebook_mode(connected = True)

## Load WRDS data from CSV

In [13]:
df = pd.read_csv('data/sectors/SP500_7yr_daily.csv', sep=',')
print("Number of data points before drop: {}".format(len(df)))
df = dropna(df)
print("Number of data points after drop: {}".format(len(df)))

Number of data points before drop: 1763
Number of data points after drop: 1762


In [15]:
# assert(df["spindx"].dtype == float)
# Convert price data to float (from str)
# df["SP500"] = df["SP500"].apply(float)

In [16]:
cf.set_config_file(theme='pearl', world_readable=False)
cf.go_offline()

In [18]:
df["close"].iplot(title='S&P 500 7-yr Close', colors=['red'])

In [19]:
# Add ta features filling NaN values
# df = add_all_ta_features(
#     df, close="SP500", fillna=True)
# df = add_all_ta_features(
    # df, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True)

### Plot Sectors

Sector data has been collected using their CUSIP ID (see: data_collection.ipynb). We now aim to plot each sector relative to the S&P 500

In [20]:
import os
from sector import *

In [24]:
stocks = {}
tkr_dict = get_ticker_dict(sectors)

for fname in os.listdir("data/sectors"):
    # Indicates subsector of S&P500
    if fname.startswith("XL") and "_" in fname:
        tkr = fname.split("_")[0]
        stock = Stock(tkr, None, tkr_dict[tkr])
        stock.data = pd.read_csv('data/sectors/'+fname, sep=',')
        stock.data.index = pd.to_datetime(stock.data["date"], format='%Y-%m-%d')
        stock.data.pop("date")
        stocks[tkr] = stock

In [25]:
# stocks["XLC"].data

In [26]:
stocks["XLC"]

           ticker                   descr        low       high      close  \
date                                                                         
2018-06-19    XLC  Communication Services  49.580002  50.060001  49.959999   
2018-06-20    XLC  Communication Services  50.450001  50.889000  50.580002   
2018-06-21    XLC  Communication Services  50.200001  50.849998  50.270000   
2018-06-22    XLC  Communication Services  50.189999  50.610001  50.490002   
2018-06-25    XLC  Communication Services  49.014999  50.230000  49.450001   
...           ...                     ...        ...        ...        ...   
2021-12-27    XLC  Communication Services  78.339996  79.044998  78.870003   
2021-12-28    XLC  Communication Services  78.714996  79.544998  78.830002   
2021-12-29    XLC  Communication Services  78.192902  79.070000  78.440002   
2021-12-30    XLC  Communication Services  78.610001  79.190002  78.769997   
2021-12-31    XLC  Communication Services  77.654999  78.870003 

In [108]:
import datetime

'''
interval: "monthly", "yearly", or None
'''
def sectplot(col, stock_dict, yTitle="Value ($)", interval=None):
    df = pd.DataFrame()
    
    def get_date_range(start_dt, end_dt, interval="monthly"):
        def diff_month(d1, d2):
            return 12 * (d2.year - d1.year) + d2.month - d1.month
        
        def diff_year(d1, d2):
            return d2.year - d1.year
        
        if interval == "monthly":
            # Get the 1st date of each month
            dates = [(start_dt + datetime.timedelta(days=32*x)).replace(day=1) for x in range(0, diff_month(start_dt, end_dt))]
        else: # Yearly
            dates = [(start_dt + datetime.timedelta(days=365*x)).replace(day=1) for x in range(0, diff_year(start_dt, end_dt))]
        return dates
    
    all_stocks = []
    for t in stock_dict.keys():
        stock = stocks[t]
        if (interval):
            # Get the monthly dates
            dates = get_date_range(stock.data.index[0], stock.data.index[-1], interval=interval)
            # Convert monthly dates (1, 2, 3, 4) to ((1-2), (2-3), (3-4), ...)
            dates = list((dates[x], dates[x+1]) for x in range(len(dates)-1))
            
            interval_df = pd.DataFrame()
            ind, ret = [], []
            for dtrng in dates:
                # Index will be the starting month (the complete return for that month)
                ind.append(dtrng[0]) 
                
                # Calculate the return for that month
                ret.append(stock.data[col][(stock.data.index >= dtrng[0]) &  (stock.data.index < dtrng[1])].sum())
                
            interval_df[col] = ret
            interval_df.index = ind
            df["{}".format(stock.desc)] = interval_df[col]

        else:
            df["{}".format(stock.desc)] = stock.data[col]

    df.iplot(title='S&P 500 Sectors ({})'.format(col),
                yTitle=yTitle,
                xTitle="Date"
                )

In [61]:
for k in stocks.keys():
    stocks[k].data = add_all_ta_features(
        stocks[k].data,
        open=None,
        high="high",
        low="low",
        close="close",
        volume="vol",
        fillna=True)


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in log


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


In [105]:
sectplot("close", stocks)

### So what are we predicting?

Well, we want to see the return of each of these sectors relative to the S&P 500. We wish to look at the change in return over an interval, so let's visualize what the % return (or retx) is over time (not cumulative among intervals, only between).

In [109]:
sectplot("retx", stocks, interval="monthly", yTitle="Monthly Return (%)")

In [99]:
stocks["XLC"].data['close'].iplot(kind='spread',colors=['green','red'])

From 2018-06-19 00:00:00 to 2021-12-31 00:00:00
