# Imports

In [None]:
import numpy as np
from scipy.stats import norm, arcsine

import matplotlib.pyplot as plt

# Function definitions

In [None]:
N = 5
bins = np.arange(-N, N + 2, step=2)
bins

In [None]:
def getSn(N: int, p: int = 0.5) -> int:
    return np.sum(np.random.choice([1, -1], p=[p, 1 - p], size=N))

getSnVect = np.vectorize(getSn)



def getPn(N: int, p: int = 0.5) -> int:
    xi_seq = np.insert(np.random.choice([1, -1], p=[p, 1 - p], size=N), 0, 0, axis=0)
    Sn = np.cumsum(xi_seq)
    Dn = np.zeros(Sn.shape)
    for i in np.arange(1, Sn.shape[0]):
        if Sn[i] > 0 or Sn[i - 1] > 0: Dn[i] = 1
    Ln = np.sum(Dn) # Dn[0] == 0 - doesn't have any impact on the result
    return (Ln / N)

getPnVect = np.vectorize(getPn)



def plotDistribution(N: int, data: np.ndarray, approx: str, filepath: str = None):
    fig = plt.figure(figsize=(10, 7))

    plt.rcParams.update({'font.size': 15})
    
    if approx == "normal":
        plt.title(f"$S_N$ random variable distribution ($N = {N}$)")
        plt.xlabel("$s$ (sum value)", fontsize=12)
        plt.ylabel("$P(S_N \leq s)$", fontsize=12)

        bins = np.arange(-N, N + 2, step=2)
        n, _, patches = plt.hist(data, bins=bins, density=True, cumulative=True, label="CDF")
        plt.xticks(bins, fontsize=10)
        
        # Fit a normal distribution plot to the data
        mean, std = norm.fit(data) 
        xmin, xmax = plt.xlim()
        x = np.linspace(xmin, xmax, 250)
        cdf = norm.cdf(x, mean, std)
        plt.plot(x, cdf, color="red", linewidth=2, label=f"Normal CDF (approximation)")

    elif approx == "arcsin":
        plt.title(f"$P_N$ random variable probability density ($N = {N}$)")
        plt.xlabel("$p$ (\"time fraction\")", fontsize=12)
        plt.ylabel("$P(P_N = p)$", fontsize=12)

        (n, bins, patches) = plt.hist(data, bins=20, density=True, cumulative=False, label="PDF")
        plt.xticks(bins, fontsize=10)

        x = np.linspace(0, 1, 250)
        pdf = arcsine.pdf(x)
        plt.plot(x, pdf, color="red", linewidth=2, label=f"Arcsin PDF (approximation)")

    else:
        print("Error: Invalid value of 'approx' - must be 'normal' or 'arcsin'")
        return

    plt.legend(prop={"size": 12})
    fig.tight_layout()
    
    if filepath:
        plt.savefig(filepath)

    plt.show();

# Exercise 1

In [None]:
# Genrating random byte sequence
with open("py_bytes.txt", 'w') as file:
    file.write(''.join(str(np.random.choice([0, 1])) for _ in range(2 * 10 ** 6)))

# Exercise 2

### Tasks: a, b

In [None]:
N = np.arange(5, 35, step=5)
k = 10000
for n in N:
    plotDistribution(n, getSnVect(np.full(shape=k, fill_value=n)), approx="normal")
                     # filepath=f"./images/ex2/s{n}_cdf.png")

### Task c

In [None]:
n = 100
k = 10000
plotDistribution(n, getSnVect(np.full(shape=k, fill_value=n)), approx="normal")
                 # filepath=f"./images/ex2/s{n}_cdf.png")

# Exercise 3

In [None]:
N = 10 ** np.arange(2, 5)
k = 5000
for n in N:
    plotDistribution(n, getPnVect(np.full(shape=k, fill_value=n)), approx="arcsin")
                     # filepath=f"./images/ex3/p{n}_pdf.png")