In [1]:
import datetime as dt
import pandas as pd
import pandas_datareader as pdr
import matplotlib.pyplot as plt
import xlwings as xl

In [2]:
tDeltas = {'1Y': dt.timedelta(days=365),
           '2Y': dt.timedelta(days=365*2),
           '3Y': dt.timedelta(days=365*3),
           '4Y': dt.timedelta(days=365*4),
           '5Y': dt.timedelta(weeks=260.714),
           '10Y': dt.timedelta(weeks=521.429)}

# Setting Dates_________#
now = dt.date.today()
start = now - tDeltas['1Y']

security_name = 'es=f'.upper()

In [3]:
price_data = pdr.DataReader(security_name, data_source='yahoo', start=start, end=now)
price_data.drop(columns=['Volume', 'Adj Close'], inplace=True)
price_data.sort_index(inplace=True)
price_data

Unnamed: 0_level_0,High,Low,Open,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-03-23,2386.00,2174.00,2220.25,2220.500000
2020-03-24,2447.75,2230.50,2233.25,2438.000000
2020-03-25,2560.75,2386.00,2442.75,2467.000000
2020-03-26,2625.75,2402.25,2471.00,2608.000000
2020-03-27,2634.50,2505.25,2627.75,2524.000000
...,...,...,...,...
2021-03-16,3980.50,3952.75,3967.00,3962.500000
2021-03-17,3983.75,3935.25,3967.50,3974.000000
2021-03-18,3988.75,3911.00,3972.00,3916.500000
2021-03-19,3934.25,3908.00,3923.75,3912.620117


In [4]:
# creating open 2open & high 2 low returns column
price_data['O2O %'] = (price_data['Open'].pct_change() * 100).round(3)
price_data['H2L %'] = (((price_data['High'] - price_data['Low'])/price_data['Low']) * 100).round(3)
price_data.sort_index(ascending=False, inplace=True)
price_data

Unnamed: 0_level_0,High,Low,Open,Close,O2O %,H2L %
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-03-22,3928.25,3885.00,3893.50,3923.000000,-0.771,1.113
2021-03-19,3934.25,3908.00,3923.75,3912.620117,-1.215,0.672
2021-03-18,3988.75,3911.00,3972.00,3916.500000,0.113,1.988
2021-03-17,3983.75,3935.25,3967.50,3974.000000,0.013,1.232
2021-03-16,3980.50,3952.75,3967.00,3962.500000,0.539,0.702
...,...,...,...,...,...,...
2020-03-27,2634.50,2505.25,2627.75,2524.000000,6.344,5.159
2020-03-26,2625.75,2402.25,2471.00,2608.000000,1.156,9.304
2020-03-25,2560.75,2386.00,2442.75,2467.000000,9.381,7.324
2020-03-24,2447.75,2230.50,2233.25,2438.000000,0.586,9.740


In [5]:
# Lowest & highest open 2 open changes
min_ = price_data['O2O %'].min()
max_ = price_data['O2O %'].max()

In [6]:
largest_5th = price_data['O2O %'].nlargest(5).iloc[-1]
smallest_5th = price_data['O2O %'].nsmallest(5).iloc[-1]
largest_5th

3.683

In [7]:
smallest_5th

-3.077

In [8]:
# Divisor
k = int(abs((smallest_5th - largest_5th)/0.3))
k

22

In [9]:
#________________Distribution Functions_________________#
def frequency(array, dataframe=None, col_name=None):
    """Returns the frequency of values within
    a start & end range, in a given DataFrames column"""

    start = array[0]
    end = array[1]
    
    k = dataframe[dataframe[col_name].between(start, end)]
    return k[col_name].count()

In [10]:
# creating bin series
bin_ = [min_ - 0.3, smallest_5th, ]
for i in range(k + 1):
    smallest_5th += 0.3
    bin_.append(smallest_5th)
bin_.append(max_ + 0.3)
bin_Series = pd.Series(bin_).round(3)
bin_Series

0    -6.722
1    -3.077
2    -2.777
3    -2.477
4    -2.177
5    -1.877
6    -1.577
7    -1.277
8    -0.977
9    -0.677
10   -0.377
11   -0.077
12    0.223
13    0.523
14    0.823
15    1.123
16    1.423
17    1.723
18    2.023
19    2.323
20    2.623
21    2.923
22    3.223
23    3.523
24    3.823
25    9.681
dtype: float64

In [11]:
frequency_series = bin_Series.rolling(2).apply(frequency, raw=True, kwargs={'dataframe': price_data, 'col_name': 'O2O %'})
frequency_series

0      NaN
1      5.0
2      4.0
3      6.0
4      2.0
5      4.0
6      2.0
7      6.0
8     15.0
9     11.0
10    16.0
11    25.0
12    31.0
13    21.0
14    28.0
15    18.0
16    20.0
17     7.0
18    10.0
19     3.0
20     5.0
21     2.0
22     3.0
23     1.0
24     1.0
25     4.0
dtype: float64

In [12]:
frequency_table = pd.concat([bin_Series, frequency_series], axis=1)         # TODO merge Frequency Table
frequency_table.columns = ['bin', 'Frequency']
frequency_table

Unnamed: 0,bin,Frequency
0,-6.722,
1,-3.077,5.0
2,-2.777,4.0
3,-2.477,6.0
4,-2.177,2.0
5,-1.877,4.0
6,-1.577,2.0
7,-1.277,6.0
8,-0.977,15.0
9,-0.677,11.0


In [13]:
# Probability & Cu-probability
prob_Series = (frequency_table['Frequency']/len(price_data['O2O %']) * 100).round(2)
cumprob = ((frequency_table['Frequency']/len(price_data['O2O %']) * 100).round(2)).cumsum()

prob_table = pd.concat([prob_Series, cumprob], axis=1)               # Probability table
prob_table.columns = ['Probability %', 'Cum Probability %']

probability_distribution = pd.concat([frequency_table, prob_table], axis=1)
probability_distribution

Unnamed: 0,bin,Frequency,Probability %,Cum Probability %
0,-6.722,,,
1,-3.077,5.0,2.0,2.0
2,-2.777,4.0,1.6,3.6
3,-2.477,6.0,2.4,6.0
4,-2.177,2.0,0.8,6.8
5,-1.877,4.0,1.6,8.4
6,-1.577,2.0,0.8,9.2
7,-1.277,6.0,2.4,11.6
8,-0.977,15.0,6.0,17.6
9,-0.677,11.0,4.4,22.0


In [14]:
# lowest & highest low to high returns
h2lmin = 0
h2lmax = price_data['H2L %'].max()
h2lmax

9.752

In [15]:
# 5th largest & smallest h2l returns
h2l_5th_largest = price_data['H2L %'].nlargest(5).iloc[-1]
h2l_5th_largest

7.324

In [17]:
# Divisor
k2 = int(abs(h2l_5th_largest/0.3))
k2

24

In [18]:
# creating bin series
bin2 = [0, ]
for i in range(k2 + 1):
    h2lmin += 0.3
    bin2.append(h2lmin)
bin2.append(h2lmax)

h2l_bin_Series = pd.Series(bin2).round(3)
h2l_bin_Series

0     0.000
1     0.300
2     0.600
3     0.900
4     1.200
5     1.500
6     1.800
7     2.100
8     2.400
9     2.700
10    3.000
11    3.300
12    3.600
13    3.900
14    4.200
15    4.500
16    4.800
17    5.100
18    5.400
19    5.700
20    6.000
21    6.300
22    6.600
23    6.900
24    7.200
25    7.500
26    9.752
dtype: float64

In [24]:
h2l_frequency_series = h2l_bin_Series.rolling(2).apply(frequency, raw=True, kwargs={'dataframe': price_data, 'col_name': 'H2L %'})
h2l_frequency_table = pd.concat([h2l_bin_Series, h2l_frequency_series], axis=1)         # TODO merge Frequency Table
h2l_frequency_table.columns = ['bin', 'Frequency']
h2l_frequency_table.fillna(value=0, inplace=True)

h2l_frequency_table

Unnamed: 0,bin,Frequency
0,0.0,0.0
1,0.3,0.0
2,0.6,3.0
3,0.9,21.0
4,1.2,35.0
5,1.5,41.0
6,1.8,31.0
7,2.1,19.0
8,2.4,24.0
9,2.7,16.0


In [25]:
# Probability & Cu-probability
h2l_prob_Series = (h2l_frequency_table['Frequency']/len(price_data['H2L %']) * 100).round(2)
h2l_cumprob = ((h2l_frequency_table['Frequency']/len(price_data['H2L %']) * 100).round(2)).cumsum()

h2l_prob_table = pd.concat([h2l_prob_Series, h2l_cumprob], axis=1)               # Probability table
h2l_prob_table.columns = ['Probability %', 'Cum Probability %']

h2l_probability_distribution = pd.concat([h2l_frequency_table, h2l_prob_table], axis=1)      # TODO merge probability distribution

h2l_probability_distribution

Unnamed: 0,bin,Frequency,Probability %,Cum Probability %
0,0.0,0.0,0.0,0.0
1,0.3,0.0,0.0,0.0
2,0.6,3.0,1.2,1.2
3,0.9,21.0,8.4,9.6
4,1.2,35.0,14.0,23.6
5,1.5,41.0,16.4,40.0
6,1.8,31.0,12.4,52.4
7,2.1,19.0,7.6,60.0
8,2.4,24.0,9.6,69.6
9,2.7,16.0,6.4,76.0
