In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import binom

In [2]:
import numpy as np
import yaml
import pandas as pd
from scipy.stats import binom

def compute_probabilities(n, K):
    greater_than_probs = []
    greater_or_equal_probs = []
    less_than_probs = []
    less_or_equal_probs = []

    for ticks in range(1, n + 1):
        min_ups_for_greater = np.ceil((K + ticks + 1) / 2.0)
        min_ups_for_equal = np.ceil((K + ticks) / 2.0)

        if min_ups_for_greater > ticks:
            prob_greater = 0.0
        else:
            prob_greater = 1 - binom.cdf(min_ups_for_greater - 1, ticks, 0.5)

        if min_ups_for_equal > ticks:
            prob_equal = 0.0
        else:
            prob_equal = 1 - binom.cdf(min_ups_for_equal - 1, ticks, 0.5)

        # assume symmetry
        prob_less = prob_greater  
        prob_less_or_equal = prob_equal  

        greater_than_probs.append(round(prob_greater, 4))
        greater_or_equal_probs.append(round(prob_equal, 4))
        less_than_probs.append(round(prob_less, 4))
        less_or_equal_probs.append(round(prob_less_or_equal, 4))

    return greater_than_probs, greater_or_equal_probs, less_than_probs, less_or_equal_probs

def read_probabilities_from_yaml(file_path):
    with open(file_path, 'r') as file:
        data = yaml.safe_load(file)
    return data

def validate_probabilities(file_path, n, K):
    yaml_probs = read_probabilities_from_yaml(file_path)
    computed_gt_probs, computed_ge_probs, computed_lt_probs, computed_le_probs = compute_probabilities(n, K)

    data = []
    for ticks in range(1, n + 1):
        yaml_gt_prob = yaml_probs[ticks]['allow_equals_false']['probability_up']
        yaml_ge_prob = yaml_probs[ticks]['allow_equals_true']['probability_up']
        yaml_lt_prob = yaml_probs[ticks]['allow_equals_false']['probability_down']
        yaml_le_prob = yaml_probs[ticks]['allow_equals_true']['probability_down']

        computed_gt_prob = computed_gt_probs[ticks-1]
        computed_ge_prob = computed_ge_probs[ticks-1]
        computed_lt_prob = computed_lt_probs[ticks-1]
        computed_le_prob = computed_le_probs[ticks-1]

        data.append({
            'Ticks': ticks,
            'YAML P(S_t > K)': yaml_gt_prob,
            'Computed P(S_t > K)': computed_gt_prob,
            'YAML P(S_t >= K)': yaml_ge_prob,
            'Computed P(S_t >= K)': computed_ge_prob,
            'YAML P(S_t < K)': yaml_lt_prob,
            'Computed P(S_t < K)': computed_lt_prob,
            'YAML P(S_t <= K)': yaml_le_prob,
            'Computed P(S_t <= K)': computed_le_prob
        })
    
    df = pd.DataFrame(data)
    return df

file_path = 'steprng_rise_fall_theo_probability.yaml'  
num_ticks = 2000  
K = 0  #rise/fall, K = 0 (atm)
results_df = validate_probabilities(file_path, num_ticks, K)
display(results_df)

Unnamed: 0,Ticks,YAML P(S_t > K),Computed P(S_t > K),YAML P(S_t >= K),Computed P(S_t >= K),YAML P(S_t < K),Computed P(S_t < K),YAML P(S_t <= K),Computed P(S_t <= K)
0,1,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000
1,2,0.2500,0.2500,0.7500,0.7500,0.2500,0.2500,0.7500,0.7500
2,3,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000
3,4,0.3125,0.3125,0.6875,0.6875,0.3125,0.3125,0.6875,0.6875
4,5,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000
...,...,...,...,...,...,...,...,...,...
1995,1996,0.4911,0.4911,0.5089,0.5089,0.4911,0.4911,0.5089,0.5089
1996,1997,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000
1997,1998,0.4911,0.4911,0.5089,0.5089,0.4911,0.4911,0.5089,0.5089
1998,1999,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000


In [3]:

def check_pass_fail(df):
    columns_to_check = [
        ('YAML P(S_t > K)', 'Computed P(S_t > K)'),
        ('YAML P(S_t >= K)', 'Computed P(S_t >= K)'),
        ('YAML P(S_t < K)', 'Computed P(S_t < K)'),
        ('YAML P(S_t <= K)', 'Computed P(S_t <= K)')
    ]
    
    results = []
    
    for _, row in df.iterrows():
        result = 'Pass'  
        for col_yaml, col_computed in columns_to_check:
            if not np.isclose(row[col_yaml], row[col_computed], atol=0.1):
                result = 'Fail'
                break  
        results.append(result)
    
    df['Overall Pass/Fail'] = results
    return df

theo_prob_check = check_pass_fail(results_df)
display(theo_prob_check[theo_prob_check['Overall Pass/Fail'] == 'Pass'])
print('Pass Cases:',theo_prob_check['Ticks'].count())

Unnamed: 0,Ticks,YAML P(S_t > K),Computed P(S_t > K),YAML P(S_t >= K),Computed P(S_t >= K),YAML P(S_t < K),Computed P(S_t < K),YAML P(S_t <= K),Computed P(S_t <= K),Overall Pass/Fail
0,1,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,Pass
1,2,0.2500,0.2500,0.7500,0.7500,0.2500,0.2500,0.7500,0.7500,Pass
2,3,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,Pass
3,4,0.3125,0.3125,0.6875,0.6875,0.3125,0.3125,0.6875,0.6875,Pass
4,5,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,Pass
...,...,...,...,...,...,...,...,...,...,...
1995,1996,0.4911,0.4911,0.5089,0.5089,0.4911,0.4911,0.5089,0.5089,Pass
1996,1997,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,Pass
1997,1998,0.4911,0.4911,0.5089,0.5089,0.4911,0.4911,0.5089,0.5089,Pass
1998,1999,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,0.5000,Pass


Pass Cases: 2000


# Probability Calculations for a Step Index Modeled as a Symmetric Random Walk

In a symmetric random walk with steps of +1 (up) and -1 (down), let:
- $X$ be the number of upward steps.
- $n$ be the total number of steps.

Each step results in a position change:
- +1 for each up step.
- -1 for each down step.

The net position after $n$ steps, given $X$ upward moves, can be calculated by:
1. Summing contributions of all up steps: $X$ steps each adding +1 contributes +$X$.
2. Summing contributions of all down steps: $(n-X)$ steps each adding -1 contributes $-(n-X) = X - n$.

Adding these contributions:
$$ S_t = X + (X - n) = 2X - n $$

This equation shows that the step index $S_t$ is determined by twice the number of up steps minus the total number of steps. It directly quantifies the displacement relative to the origin, accounting for the imbalance between upward and downward steps.


**Probability Space**: Since each step has only two outcomes (up or down) with equal probability, $X$ can be modeled using a binomial distribution $X \sim \text{Binomial}(n, p)$ with $p = 0.5$.

## Calculating Probabilities:
1. **$P(S_t > K)$**:
   - **Condition**: For $S_t > K$, the inequality translates to $2X - n > K$, which simplifies to $X > \frac{K + n + 1}{2}$.
   - **Binomial CDF**: The cumulative distribution function for a binomial random variable gives $P(X \leq x)$. Hence, $P(X > \frac{K + n + 1}{2})$ is calculated as:
     $$
     P(X > \frac{K + n + 1}{2}) = 1 - \text{CDF}_{\text{Binomial}}\left(n, 0.5, \left\lfloor \frac{K + n + 1}{2} \right\rfloor\right)
     $$
   - Adding 1 ensures that when you divide by 2, the threshold for  X  is definitely beyond the tipping point where  $S_t$  equals  $K$ . It’s like ensuring there’s a clear margin. 

2. **$P(S_t \geq K)$**:
   - **Condition**: For $S_t \geq K$, the condition $2X - n \geq K$ simplifies to $X \geq \frac{K + n}{2}$.
   - **Binomial CDF**: The probability $P(X \geq \frac{K + n}{2})$ is:
     $$
     P(X \geq \frac{K + n}{2}) = 1 - \text{CDF}_{\text{Binomial}}\left(n, 0.5, \left\lfloor \frac{K + n}{2} - 1 \right\rfloor\right)
     $$
