In [19]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import yfinance as yf
from datetime import datetime, timedelta
import statsmodels.api as sm

# meta parameters: 

FREQ = "1d" # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo 

In [20]:
def download_data(ticker, start, end, freq = "1d") -> pd.DataFrame:
    if type(ticker) == str:
        data = yf.download(ticker, start = start, end = end, interval = freq, progress = False)
        data.columns = [(f"{ticker}_"+col).lower() for col in data.columns]
    else: 
        if len(ticker) > 1:
            data = [yf.download(t, start = start, end = end, interval = freq, progress = False).rename(
                columns = {x : f"{t}_{x}" for x in yf.download(t, start = start, end = end, interval = freq, progress = False).columns}
            ) for t in ticker]
            data = pd.concat(data, axis = 1)
            data.columns = [col.lower() for col in data.columns]
    return data

test = download_data(["FB", "AAPL", "AMZN", "NFLX", "GOOG"], "2017-1-1", "2018-1-1")
test.to_csv("test.csv", encoding = "utf-8")

In [44]:
df = pd.DataFrame({"a":[1,2,3], "b":[2,4,6]})
def add(a,b):
    return a+b
df.rolling(2).apply(lambda x : print(x))

0    1.0
1    2.0
dtype: float64


DataError: No numeric types to aggregate

In [33]:
# WRITEME: write class for util_finance

class util_finance:
    def __init__(self, ticker, start, end): 
        self.ticker = ticker
        self.start = start 
        self.end = end
        self.data = download_data(ticker, start, end)
        self.o = self.data.loc[:,[col for col in self.data.columns if "_open" in col][0]]
        self.h = self.data.loc[:,[col for col in self.data.columns if "_high" in col][0]]
        self.c = self.data.loc[:,[col for col in self.data.columns if "_close" in col][0]]
        self.l = self.data.loc[:,[col for col in self.data.columns if "_low" in col][0]]
        self.adjc = self.data.loc[:,[col for col in self.data.columns if "adj close" in col][0]]
    
    # statistical functions:
    
    def logprice(self, close = "adjc"):
        return np.log(self.adjc)
        
    def ret(self, close = "adjc"):
        if close == "adjc":
            return self.adjc.pct_change()
        elif close == "c":
            return self.c.pct_change()
    
    def hh(self,lookback = 10):
        return self.h.rolling(lookback).max()
    
    def ll(self, lookback = 10):
        return self.l.rolling(lookback).min()
    
    def hl_ratio(self, lookback = 10):
        return self.hh(lookback)/self.ll(lookback)

    def geom_rolling_range(self, lookback = 10, roll_window = 10):
        hlr = self.hl_ratio(lookback) 
        return hlr / hlr.rolling(roll_window).mean()
    
    def macd(self, periods = (12,26,9)):
        adjc = self.adjc()
        k = adjc.ewm(span=periods[0], adjust=False, min_periods=periods[0]).mean()
        d = adjc.ewm(span=periods[1], adjust=False, min_periods=periods[1]).mean()
        macd = k - d
        macd_s = macd.ewm(span=periods[2], adjust=False, min_periods=periods[2]).mean()
        return macd - macd_s
    
    def vol_ohlc(self, lookback = 10):
        o = self.o
        h = self.h
        l = self.l
        c = self.c
        k = 0.34 / (1.34 + (lookback+1)/(lookback-1))
        cc = np.log(c/c.shift(1))
        ho = np.log(h/o)
        lo = np.log(l/o)
        co = np.log(c/o)
        oc = np.log(o/c.shift(1))
        oc_sq = oc**2
        cc_sq = cc**2
        rs = ho*(ho-co)+lo*(lo-co)
        close_vol = cc_sq.rolling(lookback).sum() * (1.0 / (lookback - 1.0))
        open_vol = oc_sq.rolling(lookback).sum() * (1.0 / (lookback - 1.0))
        window_rs = rs.rolling(lookback).sum() * (1.0 / (lookback - 1.0))
        result = (open_vol + k * close_vol + (1-k) * window_rs).apply(np.sqrt) * np.sqrt(252)
        result[:lookback-1] = np.nan
        return result
        
    def mdd(self, lookback = 20):
        adjc = self.adjc
        roll_max = adjc.rolling(lookback).max()
        dd = adjc/roll_max - 1
        mdd = dd.rolling(lookback).min()
        return mdd
        
    def OLS(self, market = "SPY"): # global beta
        mkt = download_data(market, self.start, self.end)
        mkt_adjc = mkt.loc[:,f"{market.lower()}_adj close"]
        mkt_ret = mkt_adjc.pct_change()
        y = mkt_ret
        X = self.ret()
        X = sm.add_constant(X)
        model = sm.OLS(y.dropna(how = "any"), 
                       X.dropna(how = "any"))
        results = model.fit()
        return results
    
    def beta(self, market = "SPY"):
        return self.OLS(market = market).params[1]
    
    def alpha(self, market = "SPY"):
        return self.OLS(market = market).params[0]
    
    def tbeta(self, market = "SPY"):
        return self.OLS(market = market).tvalues[1]
    
    def talpha(self, market = "SPY"):
        return self.OLS(market = market).tvalues[0]
        
    def OLS_rolling(self, market = "SPY", lookback = 60):
        mkt = download_data(market, self.start, self.end)
        mkt_adjc = mkt.loc[:,f"{market.lower()}_adj close"]
        mkt_ret = mkt_adjc.pct_change()
        def run(y, X):
            X = sm.add_constant(X)
            model = sm.OLS(y.dropna(how = "any"), 
                           X.dropna(how = "any"))
            results = model.fit()
            return results
        
        df = pd.concat([mkt_ret, self.ret()], axis = 1)
        print(df)
        df.rolling(lookback).apply(lambda x : print("xshape", x.shape))
        
        
    
    # plotting functions:
    
    
    
    
        

# def hh(df: pd.DataFrame, lookback: int = 14):
#     h = [col for col in df.columns if "_high" in col][0]
#     h = df.loc[:,h]
#     return h.rolling(lookback).max()

# def ll(df: pd.DataFrame, lookback: int = 14):
#     l = [col for col in df.columns if "_low" in col][0]
#     l = df.loc[:,l]
#     return l.rolling(lookback).min()

# test: 
# aapl = test.loc[:, [col for col in test.columns if "aapl_" in col]]
# hh(aapl).plot()

uf = util_finance("IBM", "2018-01-01", "2021-01-01")
uf.OLS_rolling()

            spy_adj close  ibm_adj close
Date                                    
2018-01-02            NaN            NaN
2018-01-03       0.006325       0.027488
2018-01-04       0.004215       0.020254
2018-01-05       0.006664       0.004885
2018-01-08       0.001829       0.006031
...                   ...            ...
2020-12-24       0.003890       0.006376
2020-12-28       0.008591       0.001043
2020-12-29      -0.001908      -0.008172
2020-12-30       0.001427       0.004362
2020-12-31       0.005081       0.012385

[756 rows x 2 columns]
xshape (60,)


DataError: No numeric types to aggregate

In [None]:
n = 3
plt.figure(figsize = [20, 5])
uf = util_finance("SPY", "2018-01-01", "2018-04-01")
obj = uf.ret()
for i in range(n):
    if i == 0:
        ((obj )/obj.std()).shift(-1).plot(label = "0 shifted", linestyle = "--")
        ((obj - obj.mean())/obj.std()).plot(label = "0 unshifted", color = "purple", linestyle = "--")
    else:
        ((obj - obj.mean())/obj.std()).plot(label = i)
    
    plt.legend()
    plt.plot(obj.index, [0]*len(obj.index), color = "black", linestyle = "-")
    obj = obj.diff()