In [1]:
import numpy as np
import pandas as pd

np.random.seed(seed=901212)

df = pd.DataFrame(range(1,501), columns=['indices'])
df['values'] = np.random.rand(500)*.5 + .35

In [2]:
df

Unnamed: 0,indices,values
0,1,0.491233
1,2,0.538596
2,3,0.516740
3,4,0.381134
4,5,0.670157
...,...,...
495,496,0.745895
496,497,0.661303
497,498,0.544403
498,499,0.534056


In [3]:
# tag rows based on the threshold
df['tag'] = df['values'] > .5

# first row is a True preceded by a False
fst = df.index[df['tag'] & ~ df['tag'].shift(1).fillna(False)]

# last row is a True followed by a False
lst = df.index[df['tag'] & ~ df['tag'].shift(-1).fillna(False)]

# filter those which are adequately apart
pr = [(i, j) for i, j in zip(fst, lst) if j > i + 4]

In [4]:
i, j = pr[0]
df.loc[i:j]

Unnamed: 0,indices,values,tag
15,16,0.639992,True
16,17,0.593427,True
17,18,0.810888,True
18,19,0.596243,True
19,20,0.812684,True
20,21,0.617945,True


In [5]:
df.loc[i-1:j+1]

Unnamed: 0,indices,values,tag
14,15,0.418698,False
15,16,0.639992,True
16,17,0.593427,True
17,18,0.810888,True
18,19,0.596243,True
19,20,0.812684,True
20,21,0.617945,True
21,22,0.494174,False


In [6]:
pr

[(15, 20),
 (24, 33),
 (37, 42),
 (47, 57),
 (70, 76),
 (119, 125),
 (147, 152),
 (211, 219),
 (259, 266),
 (275, 280),
 (289, 298),
 (303, 312),
 (360, 380),
 (382, 394),
 (412, 417)]

In [2]:
import yfinance as yf
from tqdm import tqdm
apple= yf.Ticker("aapl")

In [11]:
aapldf = apple.history(start="2007-01-01", end="2021-11-11", interval="1d")
aapldf.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2007-01-03,2.642215,2.651095,2.507793,2.565971,1238319600,0.0,0.0
2007-01-04,2.573626,2.631804,2.566583,2.622924,847260400,0.0,0.0
2007-01-05,2.626293,2.639459,2.584344,2.604246,834741600,0.0,0.0
2007-01-08,2.632111,2.649565,2.611289,2.617107,797106800,0.0,0.0
2007-01-09,2.647115,2.847065,2.607309,2.834511,3349298400,0.0,0.0


In [12]:
aapldf.drop(['Dividends','Stock Splits'],axis=1,inplace=True)

In [13]:
aapldf.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-01-03,2.642215,2.651095,2.507793,2.565971,1238319600
2007-01-04,2.573626,2.631804,2.566583,2.622924,847260400
2007-01-05,2.626293,2.639459,2.584344,2.604246,834741600
2007-01-08,2.632111,2.649565,2.611289,2.617107,797106800
2007-01-09,2.647115,2.847065,2.607309,2.834511,3349298400


In [14]:
aapldf['change'] = aapldf['Close'].pct_change()

In [15]:
aapldf.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-11-04,151.359097,152.207849,150.420465,150.740005,60394600,-0.003499
2021-11-05,151.889999,152.199997,150.059998,151.279999,65414600,0.003582
2021-11-08,151.410004,151.570007,150.160004,150.440002,55020900,-0.005553
2021-11-09,150.199997,151.429993,150.059998,150.809998,56787900,0.002459
2021-11-10,150.020004,150.130005,147.850006,147.919998,65187100,-0.019163


In [16]:
# tag rows based on the threshold
aapldf['tag'] = aapldf['change'] > 0

aapldf['nextdaychg'] = aapldf.shift(-1).change

# first row is a True preceded by a False
fst = aapldf.index[aapldf['tag'] & ~ aapldf['tag'].shift(1).fillna(False)]

# last row is a True followed by a False
lst = aapldf.index[aapldf['tag'] & ~ aapldf['tag'].shift(-1).fillna(False)]

# filter those which are adequately apart
pr = [(i, j,np.busday_count( i.strftime("%Y-%m-%d"), j.strftime("%Y-%m-%d") ),aapldf.loc[j].nextdaychg) for i, j in zip(fst, lst) if np.busday_count( i.strftime("%Y-%m-%d"), j.strftime("%Y-%m-%d") ) > 3]

In [17]:
len(pr)

85

In [18]:
np.mean([x[3] for x in pr])

-0.011304622087374136

In [19]:
for (i,j) in zip(fst, lst):
    print(i,j,np.busday_count( i.strftime("%Y-%m-%d"), j.strftime("%Y-%m-%d") ))
    

2007-01-04 00:00:00 2007-01-04 00:00:00 0
2007-01-08 00:00:00 2007-01-10 00:00:00 2
2007-01-16 00:00:00 2007-01-16 00:00:00 0
2007-01-24 00:00:00 2007-01-24 00:00:00 0
2007-01-29 00:00:00 2007-01-29 00:00:00 0
2007-01-31 00:00:00 2007-01-31 00:00:00 0
2007-02-02 00:00:00 2007-02-02 00:00:00 0
2007-02-06 00:00:00 2007-02-08 00:00:00 2
2007-02-12 00:00:00 2007-02-12 00:00:00 0
2007-02-14 00:00:00 2007-02-14 00:00:00 0
2007-02-20 00:00:00 2007-02-22 00:00:00 2
2007-02-28 00:00:00 2007-03-01 00:00:00 1
2007-03-05 00:00:00 2007-03-06 00:00:00 1
2007-03-08 00:00:00 2007-03-08 00:00:00 0
2007-03-12 00:00:00 2007-03-12 00:00:00 0
2007-03-14 00:00:00 2007-03-14 00:00:00 0
2007-03-16 00:00:00 2007-03-22 00:00:00 4
2007-03-26 00:00:00 2007-03-26 00:00:00 0
2007-03-29 00:00:00 2007-03-29 00:00:00 0
2007-04-02 00:00:00 2007-04-03 00:00:00 1
2007-04-05 00:00:00 2007-04-05 00:00:00 0
2007-04-10 00:00:00 2007-04-10 00:00:00 0
2007-04-16 00:00:00 2007-04-16 00:00:00 0
2007-04-18 00:00:00 2007-04-18 00:

In [35]:
pr

[(Timestamp('2020-04-08 00:00:00'), Timestamp('2020-04-14 00:00:00'), 4),
 (Timestamp('2020-05-04 00:00:00'), Timestamp('2020-05-11 00:00:00'), 5),
 (Timestamp('2020-07-29 00:00:00'), Timestamp('2020-08-06 00:00:00'), 6),
 (Timestamp('2020-08-18 00:00:00'), Timestamp('2020-08-24 00:00:00'), 4)]

In [20]:
fullpr = [(i, j,np.busday_count( i.strftime("%Y-%m-%d"), j.strftime("%Y-%m-%d") ),aapldf.loc[j].nextdaychg) for i, j in zip(fst, lst) if np.busday_count( i.strftime("%Y-%m-%d"), j.strftime("%Y-%m-%d") ) >= 0]

In [21]:
np.mean([x[2] for x in fullpr])

1.1478352692713834

In [22]:
np.max([x[2] for x in fullpr])

9

In [23]:
index_max = np.argmax([x[2] for x in fullpr])

In [25]:
fullpr[index_max]

(Timestamp('2010-10-05 00:00:00'),
 Timestamp('2010-10-18 00:00:00'),
 9,
 -0.02676104798205481)

In [10]:
from operator import itemgetter
res = max(pr, key=itemgetter(2))

In [39]:
res[2]

6

In [11]:
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
day1 = '2010-01-01'
day2 = '2010-01-15'
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())

In [42]:
print(len(pd.date_range(start=day1,end=day2, freq=us_bd)))

10


In [20]:
for (i,j) in zip(fst, lst):
    print(i,j,np.busday_count( i.strftime("%Y-%m-%d"), j.strftime("%Y-%m-%d") ),aapldf.loc[j].nextdaychg)

2020-01-06 00:00:00 2020-01-06 00:00:00 0 -0.004703000665092549
2020-01-08 00:00:00 2020-01-13 00:00:00 3 -0.013503269952068742
2020-01-16 00:00:00 2020-01-17 00:00:00 1 -0.006776902114863148
2020-01-22 00:00:00 2020-01-23 00:00:00 1 -0.002882029959404564
2020-01-28 00:00:00 2020-01-29 00:00:00 1 -0.0014490551251076234
2020-02-04 00:00:00 2020-02-06 00:00:00 2 -0.013592710214904047
2020-02-10 00:00:00 2020-02-10 00:00:00 0 -0.006033352613494691
2020-02-12 00:00:00 2020-02-12 00:00:00 0 -0.007121093631789588
2020-02-14 00:00:00 2020-02-14 00:00:00 0 -0.018310575066055934
2020-02-19 00:00:00 2020-02-19 00:00:00 0 -0.01025905968123808
2020-02-26 00:00:00 2020-02-26 00:00:00 0 -0.06536814612001485
2020-03-02 00:00:00 2020-03-02 00:00:00 0 -0.031759145544189904
2020-03-04 00:00:00 2020-03-04 00:00:00 0 -0.03243702633876211
2020-03-10 00:00:00 2020-03-10 00:00:00 0 -0.03473056555561849
2020-03-13 00:00:00 2020-03-13 00:00:00 0 -0.1286469204248718
2020-03-17 00:00:00 2020-03-17 00:00:00 0 -0.

In [15]:
aapldf.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,change,tag
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-10-07,114.620003,115.550003,114.129997,115.080002,96849000,0.016967,True
2020-10-08,116.25,116.400002,114.589996,114.970001,83477200,-0.000956,False
2020-10-09,115.279999,117.0,114.919998,116.970001,100506900,0.017396,True
2020-10-12,120.059998,125.18,119.279999,124.400002,240226800,0.063521,True
2020-10-13,125.269997,125.389999,119.650002,121.099998,262330500,-0.026527,False
2020-10-14,121.0,123.029999,119.620003,121.190002,151062300,0.000743,True
2020-10-15,118.720001,121.199997,118.150002,120.709999,112559200,-0.003961,False
2020-10-16,121.279999,121.550003,118.809998,119.019997,115393800,-0.014001,False
2020-10-19,119.959999,120.419998,115.660004,115.980003,120639300,-0.025542,False
2020-10-20,116.199997,118.980003,115.629997,117.510002,124423700,0.013192,True
