In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

from itertools import product

In [2]:
ticker = "JPM"
data = yf.download(tickers = ticker, start="2006-01-01", end="2025-01-01")

[*********************100%***********************]  1 of 1 completed


In [5]:
data

Price,Adj Close,Close,High,Low,Open,Volume,Daily return,State
Ticker,JPM,JPM,JPM,JPM,JPM,JPM,Unnamed: 7_level_1,Unnamed: 8_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2006-01-03,24.241716,40.189999,40.360001,39.299999,39.830002,12838600,,down
2006-01-04,24.101803,39.619999,40.139999,39.419998,39.779999,13491500,-0.005772,down
2006-01-05,24.174801,39.740002,39.810001,39.500000,39.610001,8109400,0.003029,up
2006-01-06,24.345129,40.020000,40.240002,39.549999,39.919998,7966900,0.007046,up
2006-01-09,24.740543,40.669998,40.720001,39.880001,39.880001,16575200,0.016242,up
...,...,...,...,...,...,...,...,...
2024-12-24,241.064987,242.309998,242.490005,239.070007,239.429993,3729100,0.016444,up
2024-12-26,241.890717,243.139999,243.259995,240.789993,241.429993,4451800,0.003425,up
2024-12-27,239.930847,241.169998,243.389999,240.039993,242.720001,5730200,-0.008102,down
2024-12-30,238.090363,239.320007,240.839996,237.110001,238.770004,5723800,-0.007671,down


In [4]:
data["Daily return"] = data["Adj Close"].pct_change() #periods=n
data["State"] = np.where(data["Daily return"] >= 0, "up", "down")

We mainly use the conditional probabilities formula
$$ P(A|B) = \frac{P(A \cap B)}{P(B)}.$$
We define sets $A_{n, \varepsilon}$ which will denote an increase (or decrease) of $\varepsilon$ percent over $n$ days. We will iterate over $\varepsilon, n$ and the possible combinations of up and downs before.

In [6]:
up_counts = len(data[data["State"] == "up"])
down_counts = len(data[data["State"] == "down"])

# P(up|up) = P(up & up)/P(up)
up_to_up = len(data[(data["State"] == "up") & (data["State"].shift(-1) == "up")])/up_counts #/len(data.query('State=="up"'))
up_to_down = len(data[(data["State"] == "up") & (data["State"].shift(-1) == "down")])/len(data[data["State"] == "up"])
down_to_up = len(data[(data["State"] == "down") & (data["State"].shift(-1) == "up")])/len(data[data["State"] == "down"])
down_to_down = len(data[(data["State"] == "down") & (data["State"].shift(-1) == "down")])/down_counts

In [7]:
transition_matrix = pd.DataFrame({
    "up": [up_to_up, down_to_up],
    "down": [up_to_down, down_to_down]
}, index=["up", "down"])

In [8]:
print(transition_matrix)

            up      down
up    0.495543  0.504052
down  0.538262  0.461738


In [10]:
condition1 = data[
    (data["State"] == "up") 
    & (data["State"].shift(1) == "down")
    & (data["State"].shift(2) == "down")
    & (data["State"].shift(3) == "down")
    & (data["State"].shift(4) == "down")
    & (data["State"].shift(5) == "down")
    ]

condition2 = data[
    (data["State"] == "down")
    & (data["State"].shift(1) == "down")
    & (data["State"].shift(2) == "down")
    & (data["State"].shift(3) == "down")
    & (data["State"].shift(4) == "down")
    ]

In [11]:
print(f"P(up | 5 down) = {len(condition1)/ len(condition2)}")

P(up | 5 down) = 0.6262626262626263


In [12]:
test = data
test["test"] = data["State"].shift(1)
test["test2"] = data["State"].shift(2)
test

Price,Adj Close,Close,High,Low,Open,Volume,Daily return,State,test,test2
Ticker,JPM,JPM,JPM,JPM,JPM,JPM,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2006-01-03,24.241716,40.189999,40.360001,39.299999,39.830002,12838600,,down,,
2006-01-04,24.101803,39.619999,40.139999,39.419998,39.779999,13491500,-0.005772,down,down,
2006-01-05,24.174801,39.740002,39.810001,39.500000,39.610001,8109400,0.003029,up,down,down
2006-01-06,24.345129,40.020000,40.240002,39.549999,39.919998,7966900,0.007046,up,up,down
2006-01-09,24.740543,40.669998,40.720001,39.880001,39.880001,16575200,0.016242,up,up,up
...,...,...,...,...,...,...,...,...,...,...
2024-12-24,241.064987,242.309998,242.490005,239.070007,239.429993,3729100,0.016444,up,up,up
2024-12-26,241.890717,243.139999,243.259995,240.789993,241.429993,4451800,0.003425,up,up,up
2024-12-27,239.930847,241.169998,243.389999,240.039993,242.720001,5730200,-0.008102,down,up,up
2024-12-30,238.090363,239.320007,240.839996,237.110001,238.770004,5723800,-0.007671,down,down,up


In [13]:
sequence = ["down", "down", "down", "down", "down", "up"]

# Build the condition using a loop
condition = True
for i, state in enumerate(sequence):
    print(i, state)
    condition &= (data["State"].shift(-i) == state)

# Filter the DataFrame
filtered_data = data[condition]

0 down
1 down
2 down
3 down
4 down
5 up


In [14]:
states = ["up", "down"]
transition_probabilities = {}

for length in range(2, 11): 
    for state_comb in product(states, repeat=length):
        condition1 = True
        condition2 = True
        for i, state in enumerate(state_comb):
            # In our case it does not matter if we look forward (i.e. -i) or backwards (i.e. i) because in any case we iterate over all combinations.
            condition1 &= (data["State"].shift(i) == state)
            # The first state is the one we "predict"
            if i != 0:
                condition2 &= (data["State"].shift(i) == state)

        transition_probabilities[state_comb] = (len(data[condition1])/len(data[condition2]), len(data[condition2]))

In [19]:
for state_comb, prob in transition_probabilities.items():
    if prob[0] > 0.7 and prob[1] > 10:
        print(f"Probability of {state_comb[0]} after {state_comb[1:]} is {prob[0]}, pattern appeared {prob[1]} times")

Probability of up after ('down', 'up', 'up', 'up', 'down', 'down', 'down') is 0.7096774193548387, pattern appeared 31 times
Probability of up after ('down', 'down', 'down', 'down', 'down', 'down', 'down') is 0.7692307692307693, pattern appeared 13 times
Probability of down after ('up', 'down', 'down', 'up', 'up', 'down', 'up') is 0.7941176470588235, pattern appeared 34 times
Probability of down after ('down', 'down', 'up', 'down', 'down', 'down', 'up') is 0.7083333333333334, pattern appeared 24 times
Probability of up after ('up', 'up', 'up', 'up', 'down', 'down', 'down', 'up') is 0.7333333333333333, pattern appeared 15 times
Probability of up after ('up', 'up', 'up', 'down', 'down', 'down', 'down', 'down') is 0.7333333333333333, pattern appeared 15 times
Probability of up after ('up', 'up', 'down', 'up', 'down', 'down', 'down', 'up') is 0.7692307692307693, pattern appeared 13 times
Probability of up after ('up', 'up', 'down', 'down', 'up', 'up', 'up', 'down') is 0.7894736842105263, pa

For the whole strategy, we want to do the following: we want to know all transition probabilities in a sequence (we already calculated this) and then find a good entry, and the exit is defined if we go only in the oposite state with high probability. For example we enter 
up after ("up", "down", "down") with probability 0.6 and then we know that down down follows with probability 0.7 (the more exit steps, the lower the probability)

Enter the strategy if the probability of the next state (state n+1) is high. If the state after (n+2) is the opposite one (for one or multiple times) with high probability then enter the trade at n and sell at n+1. If the the state after is the same state then enter the trade and look when the next opposite sign appears and with which probability. If the probability is too low, don't enter the trade, otherwise enter the trade. Work with conditional probabilites!