In [106]:
import numpy as np
import pandas as pd


import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "svg"
import random

import statsmodels.api as sm
import scipy.stats as stats

import yfinance as yf

In [107]:
import packages.data_analysis as da

In [68]:
## get the data
es_data = yf.download("AAPL", start='2018-01-01', end='2024-11-10', interval="1d") 
es_data.columns = es_data.columns.get_level_values(0)
es_data.index = pd.to_datetime((es_data.index).tz_localize(None))
print(es_data.shape)

[*********************100%***********************]  1 of 1 completed

(1726, 6)





In [69]:
es_data.head()

Price,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,40.524349,43.064999,43.075001,42.314999,42.540001,102223600
2018-01-03,40.517292,43.057499,43.637501,42.990002,43.1325,118071600
2018-01-04,40.705486,43.2575,43.3675,43.02,43.134998,89738400
2018-01-05,41.168938,43.75,43.842499,43.262501,43.360001,94640000
2018-01-08,41.016014,43.587502,43.9025,43.482498,43.587502,82271200


In [100]:
# Calculate the daily volatility
def getDailyVol(close, span0=100):
    df0 = close.index.searchsorted(close.index-pd.Timedelta(days=1))
    df0 = df0[df0>0]
    df0 = pd.Series(close.index[df0-1], index=close.index[close.shape[0]-df0.shape[0]:])
    df0=close.loc[df0.index]/close.loc[df0.values].values-1
    #df0 = df0.ewm(span=span0).std()
    return df0

In [101]:
getDailyVol(es_data['Close'])

Date
2018-01-04    0.004470
2018-01-05    0.016083
2018-01-08   -0.003714
2018-01-09   -0.003829
2018-01-10   -0.000344
                ...   
2024-11-04   -0.004038
2024-11-05    0.002422
2024-11-06    0.003198
2024-11-07    0.018035
2024-11-08    0.019037
Name: Close, Length: 1724, dtype: float64

In [103]:
es_data['Close']/es_data['Close'].shift(-1) - 1

Date
2018-01-02    0.000174
2018-01-03   -0.004623
2018-01-04   -0.011257
2018-01-05    0.003728
2018-01-08    0.000115
                ...   
2024-11-04   -0.006444
2024-11-05    0.003278
2024-11-06   -0.020925
2024-11-07    0.002291
2024-11-08         NaN
Name: Close, Length: 1726, dtype: float64

In [104]:
close = es_data['Close']
# df0 is the places that the previous day should stay in the old index array
df0 = close.index.searchsorted(close.index-pd.Timedelta(days=1))
# take positive positions
df0 = df0[df0>0]

df0 = pd.Series(close.index[df0-1], index=close.index[close.shape[0]-df0.shape[0]:])
print(close.loc[df0.index])
print(close.loc[df0.values])
df0=close.loc[df0.index]/close.loc[df0.values]-1
df0

Date
2018-01-04     43.257500
2018-01-05     43.750000
2018-01-08     43.587502
2018-01-09     43.582500
2018-01-10     43.572498
                 ...    
2024-11-04    222.009995
2024-11-05    223.449997
2024-11-06    222.720001
2024-11-07    227.479996
2024-11-08    226.960007
Name: Close, Length: 1724, dtype: float64
Date
2018-01-02     43.064999
2018-01-03     43.057499
2018-01-05     43.750000
2018-01-05     43.750000
2018-01-08     43.587502
                 ...    
2024-11-01    222.910004
2024-11-01    222.910004
2024-11-04    222.009995
2024-11-05    223.449997
2024-11-06    222.720001
Name: Close, Length: 1724, dtype: float64


Date
2018-01-02    NaN
2018-01-03    NaN
2018-01-04    NaN
2018-01-05    0.0
2018-01-05    0.0
             ... 
2024-11-04    0.0
2024-11-05    0.0
2024-11-06    0.0
2024-11-07    NaN
2024-11-08    NaN
Name: Close, Length: 2088, dtype: float64

In [105]:
def applyPtSlOnT1(close, events, ptSl, molecule):
    # apply stop loss/profit taking, it if takes place before t1 (end of event)
    events_ = events.loc[molecule]
    out = events_[['t']].copy(deep=True)
    if ptSl[0] > 0:
        pt = ptSl[0]*events_['trgt']
    else:
        pt = pd.Series(index=events.index)
    if ptSl[1] > 0:
        sl = ptSl[1]*events_['trgt']
    else:
        sl = pd.Series(index=events.index)

    for loc, t1 in events_['t1'].fillna(close.index[-1]).iteritems():
        df0 = close[loc, t1] # path prices
        df0 = (df0/close[loc] - 1)*events_.at[loc, 'side'] # path return
        out.loc[loc, 'sl'] = df0[df0<sl[loc]].index.min()
        out.loc[loc, 'pt'] = df0[df0>pt[loc]].index.min()
    return out

In [None]:
def getEvents(close, tEvents, ptSl, trgt, minRet, numThreads, t1=False, side=None):
    # 1) get target
    trgt = trgt.loc[tEvents]
    trgt = trgt[trgt>minRet]
    # 2) get t1 (max holding period)
    if t1 is False:
        t1 = pd.Series(np.nan, index=tEvents)
    # 3) form events object, apply stop loss on t1
    if side is None:
        side_, ptSl_ = pd.Series(1, index=trgt.index), [ptSl[0], ptSl[0]]
    else:
        side_, ptSl_ = side.loc[trgt.index], ptSl[:2]
    events = pd.concat({'t1': t1, 'trgt': trgt, 'side': side_}, axis=1).dropna(subset=['trgt'])
    df0 = mpPandasObj(func=applyPtSlOnT1, pdObj=('molecule', events.index),
                      numThreads=numThreads, close=inst['Close'], events=events, ptSl=ptSl_)
    events['t1'] = df0.dropna(how='all').min(axis=1)
    if side is None:
        events = events.drop('side', axis=1)
    return events