In [57]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [262]:
def generate_two_uniform(a : int = -1, b : int = 0, c : int = 1, p : float | list = 0.4, n : int = 1000):
    """
    Samples from a distribution
    of two joined unforms rvs. n times. 
    Where U1 = Uniform(a,b), U2 = Uniform(b,c),
    B = Bernouilli(p), and the resulting r.v. is 
    X = Indicator(B)*U1 + Indicator(not B)*U2
    """
    assert a <= b, "a must be less than or equal to b"
    assert b <= c, "b must be less than or equal to c"
    
    if isinstance(p, list):
        assert len(p) == n, "list of p's must be same length as n"
    else:
        p = np.array([p] * n)

    l = []
    
    for i in range(0, n):
        r = np.random.rand(1)
        if r < p[i][0]: # uniform 1
            x , y = a , b
        else: # uniform 2
            x , y = b, c
        r = np.abs(y-x) * np.random.rand(1) + x
        l.append(r)
    
    #ev = (p*(a - c) + c + b)/2 #expected value
    return np.array(l)


In [334]:
l = 10
num_time_steps =  20000
a = np.zeros((num_time_steps, l))
for i in range(l):
        p = 0.4980 + np.random.rand(num_time_steps,1) * 0.002
        a[:, i] = 1 + generate_two_uniform(p = p, n = num_time_steps).T * (num_time_steps * 10**(-7))
a = a.cumprod(axis=0)
starting_px = np.random.rand(l,) * 300
data_fake = a * starting_px
d = {}
for i in range(l):
    comp_i = "Company"+str(i)
    d[comp_i] = data_fake[:, i]

In [335]:
df = pd.DataFrame(d)
df

Unnamed: 0,Company0,Company1,Company2,Company3,Company4,Company5,Company6,Company7,Company8,Company9
0,56.954525,125.506133,153.988110,210.697117,101.962338,222.828325,221.928084,133.163255,175.447419,219.105769
1,57.052645,125.284834,154.294985,210.591065,101.993456,222.919320,221.938462,133.378610,175.367345,219.108153
2,56.960332,125.324730,154.210847,210.953502,102.044945,222.978996,222.300457,133.604439,175.182806,218.921494
3,56.958895,125.394249,154.342979,210.700152,102.050824,222.733092,222.507782,133.658147,175.490584,218.946310
4,57.054242,125.580806,154.560685,210.453936,102.051907,223.067287,222.539617,133.576453,175.665811,218.977475
...,...,...,...,...,...,...,...,...,...,...
19995,45.646212,133.652312,205.291874,205.106090,89.906596,222.037439,237.475358,136.531523,166.016087,211.795322
19996,45.655448,133.649884,205.397918,204.877860,89.969496,221.806367,237.128959,136.491552,165.877556,212.172315
19997,45.662770,133.639270,205.190415,205.248105,90.072412,221.446449,236.953614,136.613664,165.911060,212.414383
19998,45.618816,133.534923,205.297838,205.573531,90.020733,221.678023,237.087452,136.587283,165.651316,212.360134


In [None]:
def coarse_df(df : pd.DataFrame, coarse_size : int = 5):
    """
    Coarses the rows of a df, replace with a 
    multiindex of OHLC
    """
    