In [None]:
import numpy as np
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
from enum import Enum
import random 


num_times = 390
start_dt = datetime(2000, 1, 1, 9, 30)  # arbitrary date
time = [ (start_dt + timedelta(minutes=min)).time().strftime("%H:%M") for min in range(num_times)]
start_price = 100
ema_span = 11
to_plot = True

# 1 because curr_open = next_close, so closes + first open,
num_prices = num_times + ema_span + 1
overall_day_change = 0.017 # a percent change over the entire day, if pure increase then a 15% increase

t = np.arange(num_times)
mid_index = num_times // 2 
min_volume = 1000
max_volume = 10000  # Peak volume at open/close
power = 50
volume_profile = min_volume + (max_volume - min_volume) * ((t - mid_index) / mid_index)**power

def create_file_and_print(prices, volume, file_name = None, is_plot = True):
    global time
    
    # common code for creating the data_frame and the graph
    round_decimals = 4
    open = np.round( prices[:-1], decimals= round_decimals )
    close = np.round( prices[1:], decimals= round_decimals )
    high = np.round(np.maximum(open,close), decimals = round_decimals )
    low =np.round( np.minimum(open, close), decimals = round_decimals ) 
    
    df = pd.DataFrame(data = {
        "Time": time,
        "Close": close,
        "High": high,
        "Low": low,
        "Open": open,
        "Volume": volume  # Add volume column
    })
    df["Time"] = df["Time"].astype("string")

    # Visualize
    if is_plot:
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
        ax1.plot(range(num_times), close, label='Price')
        ax1.set_title('Price vs Time')
        ax2.bar(range(num_times), volume, color='gray', alpha=0.8)
        ax2.set_title('U-Shaped Volume Profile')
        plt.tight_layout()
        plt.show()

    if file_name is not None: 
        df.to_csv(file_name, header=True, index=False)

def generate_volume_with_ema(prices, baseline_volume, ema_span, k=0.4, C=0.1, sigma_v=0.4, threshold=0.0019, spike_mult=4):
    """
    Generate a synthetic intraday volume series aligned to price behavior relative to an EMA.

    Takes a full price history (including warm-up) and a baseline intraday volume profile of length T.
    Computes an EMA over the entire price array, then for each of the T intervals:
    - Measures the relative deviation of price from its EMA,
    - Normalizes deviations and applies an exponential modulation to the baseline volume,
    - Applies a spike multiplier when deviation exceeds a threshold,
    - Adds log-normal noise.
    Returns:
    - volumes: array of length T with the synthetic volume for each interval,
    - prices_out: the corresponding slice of T+1 price points used for OHLC alignment.
    """
    prices = np.asarray(prices, dtype=float)
    baseline = np.asarray(baseline_volume, dtype=float)
    T = len(baseline)
    warm_up = len(prices) - T -1
    if warm_up < ema_span:
        raise ValueError("Need at least ema_span warm-up points before baseline period")

    # Compute EMA reference series
    ema = np.empty(len(prices), dtype=float)
    alpha = 2.0 / (ema_span + 1)
    ema[0] = prices[0]
    for i in range(1, len(prices)):
        ema[i] = alpha * prices[i] + (1 - alpha) * ema[i-1]

    # Compute relative deviations and global RMS scale
    prices_out = prices[warm_up: warm_up + T + 1]
    rel_dev = np.abs(prices_out[:-1]/ ema[warm_up+1 :warm_up + T+1] - 1)  # array length T
    sd = np.sqrt(np.mean(rel_dev**2))
    
    C_add = baseline.mean()*C* np.sqrt(1/np.pi)

    volumes = np.zeros(T, dtype=int)
    for i in range(T):
    
        # Modulation centered at 1
        M = np.exp(k*(rel_dev[i] / sd - 1)) # try to keep the argument of the exponent around 0, since e^0 ~ 1
        # Spike detection: threshold_mult
        spike = 1.0
        if rel_dev[i] > threshold :spike*= spike_mult
        # Noise component
        eps_v = sigma_v* np.random.standard_normal()
        vol = baseline[i] * M * spike * np.exp(eps_v)
        # The additive component
        vol += C_add * max(0, 1 - rel_dev[i]/sd)
        # Ensure at least 1
        volumes[i] = max(1, int(round(vol)))

    return volumes, prices_out

"""
Common function for breakout for consolidation, OU and jump diffusionxs
"""

def simulate_ou_process(start_price: float,duration: int,theta: float | np.ndarray = 0.0005,sigma: float | np.ndarray = 0.0002,mu: None | float | np.ndarray = None):
    """
    Simulate an Ornstein-Uhlenbeck (mean-reverting) price process.

    Args:
        start_price (float): Initial price at time t = 0.
        duration (int): Number of time steps to simulate.
        theta (float or np.ndarray): Mean-reversion speed. If array, must be length `duration`.
        sigma (float or np.ndarray): Volatility (noise scale). If array, must be length `duration`.
        mu (None, float, or np.ndarray): Drift or long-run mean target.
            - None: no drift; mean reversion to constant start_price.
            - float: linear drift; mean_series = start_price * (1 + (i * mu) / duration).
            - np.ndarray: custom mean_series; must have length `duration`.

    Returns:
        np.ndarray: Simulated price series of length `duration`.
    """
    # Validate inputs
    if duration <= 0:
        raise ValueError("`duration` must be a positive integer")

    # Initialize price array
    prices = np.zeros(duration, dtype=float)
    prices[0] = start_price

    # Construct target mean series
    if mu is None:
        mean_series = np.full(duration, start_price, dtype=float)
    elif isinstance(mu, (int, float)):
        if mu == 0:
            mean_series = np.full(duration, start_price, dtype=float)
        else:
            # Linear drift towards a changing mean
            mean_series = start_price * (1 + np.arange(duration) * (mu / duration))
    elif isinstance(mu, np.ndarray):
        if mu.shape[0] != duration:
            raise ValueError("`mu` array length must equal `duration`")
        mean_series = mu.astype(float)
    else:
        raise ValueError("`mu` must be None, a float, or a numpy array of length `duration`")

    # Construct theta series
    if isinstance(theta, (int, float)):
        theta_series = np.full(duration, float(theta), dtype=float)
    elif isinstance(theta, np.ndarray):
        if theta.shape[0] != duration:
            raise ValueError("`theta` array length must equal `duration`")
        theta_series = theta.astype(float)
    else:
        raise ValueError("`theta` must be a float or a numpy array of length `duration`")

    # Construct sigma series
    if isinstance(sigma, (int, float)):
        sigma_series = np.full(duration, float(sigma), dtype=float)
    elif isinstance(sigma, np.ndarray):
        if sigma.shape[0] != duration:
            raise ValueError("`sigma` array length must equal `duration`")
        sigma_series = sigma.astype(float)
    else:
        raise ValueError("`sigma` must be a float or a numpy array of length `duration`")

    # Simulate OU process
    for t in range(1, duration):
        drift = theta_series[t] * (mean_series[t] - prices[t - 1])
        noise = sigma_series[t] * np.random.standard_normal()
        prices[t] = prices[t - 1] + drift + noise

    return prices

"""
A baseline hold profile
"""
prices = np.full(391, start_price)
create_file_and_print(prices, volume_profile.round(), "../Cirricula/Phase 2/hold", is_plot= False)

In [None]:
"""
Simple OU processes
"""
np.random.seed(100)

# theta bounds
theta_min = 0.02
theta_max = 0.09
# sigma bounds
sigma_min = 0.11
sigma_max = 0.21
# mu bounds
mu_min = -0.03
mu_max = 0.03

num_profiles = 100
for file_ind in range(num_profiles):
    sigma = np.random.uniform(sigma_min, sigma_max)
    theta = np.random.uniform(theta_min, theta_max)
    mu = np.random.uniform(mu_min, mu_max)
    
    prices = simulate_ou_process(start_price, num_prices, theta, sigma, mu)
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = "../Cirricula/Phase 2/ou_"
    
    if mu >= 0.02:file_name+="inc_fast_mu_"
    elif mu >=0.005:file_name+="inc_slow_mu_"
    elif -0.005< mu <0.005:file_name+="stationary_mu_"
    elif mu<= -0.005: file_name+="dec_slow_mu_"
    elif mu<= -0.02: file_name+="dec_fast_mu_"
    
    file_name += f"{file_ind}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)
    
    if file_ind%10==0:
        print(f"avg dlog price: {1000*np.mean(np.abs(np.diff(np.log(prices))))}")
        print(f"avg dlog volume: {np.mean(np.abs(np.log(volumes/volume_profile)))}")


In [None]:
np.random.seed(100)
"""
Jump diffusion processess
"""
# the mu and sigma numbers are chosen so that there are 1-3 jumps in each profile 
mu = 0.78
sigma = 0.25
num_trends = 100
trend_lengths = (np.random.normal(mu, sigma, num_trends)*num_prices).astype(int)
trend_lengths.sort()
# theta bounds
theta_min = 0.01
theta_max = 0.09
# sigma bounds
sigma_min = 0.1
sigma_max = 0.2
# mu bounds
mu_min = -0.005
mu_max = 0.005

min_jump_pcnt = 0.01
max_jump_pcnt = 0.03

num_profiles = 100
for file_ind in range(num_profiles):
    trend_lens = []
    while sum(trend_lens) < num_prices:
        trend_lens.append(np.random.choice(trend_lengths))
    trend_lens[-1] = num_prices - sum(trend_lens[:-1])
    
    prices = np.array([])
    for trend_len in trend_lens:
        sigma = np.random.uniform(sigma_min, sigma_max)
        theta = np.random.uniform(theta_min, theta_max)
        mu = np.random.uniform(mu_min, mu_max)
        curr_prices = simulate_ou_process(start_price, trend_len, theta, sigma, mu)
        
        # add a jump between segments
        if prices.size != 0: # ie has a previous segment
            jump_percent = np.random.uniform(min_jump_pcnt, max_jump_pcnt)
            jump = start_price * jump_percent * random.choice([-1, 1]) # a positive or negative jump
            offset = (prices[-1] + jump) - curr_prices[0]
            curr_prices += offset
        prices = np.concatenate([prices, curr_prices], axis=0)

    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    
    file_name = f"../Cirricula/Phase 2/jd_{len(trend_lens)}_jump_{file_ind}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)
    if file_ind%10==0:
        print(f"avg dlog price: {1000*np.mean(np.abs(np.diff(np.log(prices))))}")
        print(f"avg dlog volume: {np.mean(np.abs(np.log(volumes/volume_profile)))}")



In [None]:
np.random.seed(999)
""" 
breakout from consolidation
"""
# creating trend lines using desmos values for mu=0.5 and sigma so that no values are negative and always close to mu.
mu=0.5
sigma=0.10
num_trends = 20
trend_lines = (np.random.normal(mu,sigma, num_trends)*num_prices).astype(int)
trend_lines.sort()

# parameters for stationary phase
theta_stat_start = 0.001
theta_stat_end = 0.04       # A larger theta means faster return to mean

mean_stat_min = 0
mean_stat_max = 0.005

# parameters for trend phase
theta_trend_min = 0.01
theta_trend_max = 0.09       

mean_trend_min = 0.01
mean_trend_max = 0.05

# common parameters
sigma_min = 0.1
sigma_max = 0.2

num_profiles = 100
for file_ind in range(num_profiles):
    # Creating the stationary phase
    stat_dur = np.random.choice(trend_lines)

    stat_theta = np.linspace(theta_stat_start, theta_stat_end, stat_dur)
    stat_sigma = np.random.uniform(sigma_min, sigma_max)
    stat_mu = np.random.uniform(mean_stat_min, mean_stat_max)*np.random.choice([-1,1])
    
    stat_prices =  simulate_ou_process(start_price, stat_dur, stat_theta, stat_sigma, stat_mu)
    
    # creating the trending phase
    trend_dur = num_prices - stat_dur
    
    trend_theta = np.random.uniform(theta_trend_min, theta_trend_max)
    trend_sigma = np.random.uniform(sigma_min, sigma_max)
    trend_mu = np.random.uniform(mean_trend_min, mean_trend_max)*np.random.choice([-1,1])
    
    trend_prices =  simulate_ou_process(start_price, trend_dur, trend_theta, trend_sigma, trend_mu)
    
    # combining phases
    offset = stat_prices[-1] - trend_prices[0]
    trend_prices +=offset
    prices = np.concatenate([stat_prices, trend_prices], axis=0)
    
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    
    # creating discernable file_name 
    file_name = f"../Cirricula/Phase 2/bfc_for_" 
    
    if abs(trend_mu) < (mean_trend_max + mean_trend_min)/2: file_name+="small_"
    else: file_name+="large_"
    
    if trend_mu >0: file_name+="pos_mean_"
    else: file_name+="neg_mean_"
    
    if trend_dur < num_prices/2: file_name += "short_trend"
    else: file_name += "long_trend"
    
    file_name += f"_{file_ind}"
    
    create_file_and_print(prices, volumes, file_name, is_plot=False)
    
    if file_ind%10==0:
        print(f"avg dlog price: {1000*np.mean(np.abs(np.diff(np.log(prices))))}")
        print(f"avg dlog volume: {np.mean(np.abs(np.log(volumes/volume_profile)))}")

In [None]:
"""
Momentum with Volume Acceleration & Taper
"""
np.random.seed(100)
def generate_sigmoid_mean_series( total_pct_inc, horizontal_stretch_pct=0.05, center_index=None):
    """
    Generate an S-shaped mean series using a sigmoid function.

    Parameters:
    - total_pct_inc: float, total percentage increase over the series (e.g., 0.01 for +1%).
    - horizontal_stretch_pct: float, controls steepness; larger values make transition sharper.
    - center_index: int or None, index at which midpoint of sigmoid occurs; if None, defaults to length//2.

    Returns:
    - mean_series: np.ndarray of shape (num_prices,), values starting near start_price and
      rising toward start_price*(1 + total_pct_inc).
    """
    global start_price, num_prices
    times = np.arange(num_prices)
    if center_index is None:
        center_index = num_prices // 2

    # Compute input for sigmoid: shift times so midpoint at center_index
    x = (times - center_index) * horizontal_stretch_pct

    # Sigmoid function: stable form 1 / (1 + exp(-x))
    sig = 1 / (1 + np.exp(-x))

    # Scale to start_price → start_price*(1 + total_pct_inc)
    mean_series = start_price * (1 + total_pct_inc * sig)
    return mean_series

theta = 0.10
sigma = 0.1
min_pcent_inc = 0.009
max_pcent_inc = 0.031
min_h_stretch = 0.03
max_h_stretch = 0.12
min_center = int(0.2*num_prices)
max_center = int(0.8*num_prices)

num_profiles = 100
for file_ind in range(num_profiles):
    
    h = np.random.uniform(min_h_stretch, max_h_stretch)
    
    total_pcent_inc = np.random.uniform(min_pcent_inc, max_pcent_inc)
    total_pcent_inc*=np.random.choice([-1,1])
    
    center = np.random.randint(min_center, max_center)
    mu = generate_sigmoid_mean_series(total_pcent_inc, h, center)

    prices = simulate_ou_process(start_price, duration = num_prices,theta=theta,sigma=sigma, mu= mu)

    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    
    file_name = f"../Cirricula/Phase 2/acc_then_taper_"
    if total_pcent_inc >0:
        if total_pcent_inc >(max_pcent_inc + min_pcent_inc)/2:
            file_name +="pos_large"
        else: file_name +="pos_small"
    else:
        if -total_pcent_inc > (max_pcent_inc + min_pcent_inc)/2:
            file_name +="neg_large"
        else: file_name += "neg_small"
    file_name += f"_{file_ind}"
    
    create_file_and_print(prices, volumes, file_name, is_plot=False)
    plt.plot(prices)
    if file_ind%10==0:
        print(f"avg dlog price: {1000*np.mean(np.abs(np.diff(np.log(prices))))}")
        print(f"avg dlog volume: {np.mean(np.abs(np.log(volumes/volume_profile)))}")

In [None]:
"""
Regime-Switching Trend and Volatility
"""
np.random.seed(100)
random.seed(100)
# mu and sigma are chosen using a visual inspection from desmos, so that each profile contains 3-5 different segment
num_trend_samples = 100
mu = 0.28
sigma = 0.10
trend_lengths = (np.random.normal(mu, sigma, num_trend_samples)*num_prices).astype(int)
trend_lengths.sort()

means_dict = {"pos_small":0.01, "pos_mid":0.02, "pos_large":0.03, "no_trend":0.00, "neg_small":-0.01, "neg_mid":-0.02, "neg_large":-0.03}
means_dict_keys = list(means_dict.keys())
sigma_dict = {"small":0.1, "mid":0.12, "large":0.15}
sigma_dict_keys = list(sigma_dict.keys())
theta = 0.1

num_profiles = 100
for file_ind in range(num_profiles):
    trend_lens = []
    while sum(trend_lens) < num_prices:
        trend_lens.append(random.choice(trend_lengths))
    # remove the last element since it overshot num_prices and replace it with the revelant length
    trend_lens[-1] = num_prices - sum(trend_lens[:-1])
    file_name = "../Cirricula/Phase 2/rst_"
    prices = np.array([])
    
    for trend_len in trend_lens:
        # trend type
        trend_type = random.choice(means_dict_keys)
        # assign trend mean
        trend_mean = means_dict[trend_type]
        # assign trend sigma
        if trend_type == "no_trend": trend_sigma = sigma_dict["small"]
        else: trend_sigma = sigma_dict[trend_type.split("_")[-1]]
        # get curr prices
        curr_prices = simulate_ou_process(start_price,duration=trend_len,theta=theta,sigma=trend_sigma,mu=trend_mean)
        
        # add a small jump between segments
        if prices.size != 0: # ie has a previous segment
            jump_percent = trend_mean*0.1
            jump = start_price * jump_percent * random.choice([-1, 1])  # ±0.5%
            offset = (prices[-1] + jump) - curr_prices[0]
            curr_prices += offset
        
        file_name+= f"{trend_type}_"
        prices = np.concatenate([prices, curr_prices], axis=0)

    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = file_name[:-1] # remove the last underscore
    file_name = f"{file_name}_{file_ind}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)
    if file_ind%10==0:
        print(f"avg dlog price: {1000*np.mean(np.abs(np.diff(np.log(prices))))}")
        print(f"avg dlog volume: {np.mean(np.abs(np.log(volumes/volume_profile)))}")


In [None]:
"""
Volatility Clustering Segments
"""

class Regime(Enum):
    low = -1
    high = 1

    def regime_change(self):
        if self == self.low: return self.high
        if self == self.high: return self.low
        raise ValueError(f"current regime is not a proper regime.")

def simulate_volatility_clustering(start_regime:Regime, low_vol:float, high_vol:float, switch_prob:float, h_reset:bool, h_alpha:float):
    global start_price, num_prices
    curr_regime = start_regime
    
    prices = [start_price]
    h_t = 1
    for t in range(1,num_prices):
        # Regime switch decision
        if np.random.standard_normal() < switch_prob:
            curr_regime = curr_regime.regime_change()
            if h_reset: h_t = 1
        
        # Base volatility from regime
        base_vol = low_vol if curr_regime == Regime.low else high_vol
        
        # Apply persistence factor: actual volatility = base_vol * sqrt(h_t)
        vol_t = base_vol * np.sqrt(h_t)
        r = vol_t * np.random.standard_normal()
        new_price = prices[-1] * np.exp(r)
        prices.append(new_price)
        
        # Update persistence factor h_t using EMA of squared normalized return
        # normalized by base_vol: (r / base_vol)^2
        h_t = h_alpha * h_t + (1 - h_alpha) * (r/base_vol)**2
    return np.array(prices)

seeds = [1000 + i for i in range(10)]
for seed in seeds:
    np.random.seed(seed)
    # 1. Basic profile. 
    # Effect: start in low regime, with not too low low_vol and not too high high_vol, decent switch_prob and with resets at each regime change.
    start_regime = Regime.low
    low_vol = 0.001 # volatility
    high_vol = 0.002 # volatility
    switch_prob = 0.04 
    h_reset=True
    h_alpha = 0.9 # the alpha for ema calculation

    prices = simulate_volatility_clustering(start_regime,low_vol,high_vol,switch_prob,h_reset,h_alpha) 
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = f"../Cirricula/Phase 2/vc_basic_{seed}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)

    # 2.Long Quiet, Sharp Spikes
    #Effect: Long calm periods; occasional regime switch to very noisy episodes, but persistence within each.
    start_regime = Regime.low
    low_vol=0.001
    high_vol=0.005 # (high-vol much larger than low)
    switch_prob=0.02 # (rare switches)
    h_alpha=0.95 #(high persistence once a shock occurs)
    h_reset=True #(reset persistence on switch, so high-vol only from new shocks)

    prices = simulate_volatility_clustering(start_regime,low_vol,high_vol,switch_prob,h_reset,h_alpha) 
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = f"../Cirricula/Phase 2/vc_long_quiet_sharp_spike_{seed}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)

    # 3. Moderate Clustering
    # Effect: Balanced quiet/active durations; moderate persistence.
    start_regime = Regime.low
    low_vol=0.001
    high_vol=0.002
    switch_prob=0.05 #(moderate frequency)
    h_alpha=0.9
    h_reset=True

    prices = simulate_volatility_clustering(start_regime,low_vol,high_vol,switch_prob,h_reset,h_alpha) 
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = f"../Cirricula/Phase 2/vc_moderate_clustering_{seed}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)

    # 4.Frequent Switching, Low Persistence
    #Effect: Many short bursts of noise, quickly reverting; tests agent’s adaptability.
    start_regime = Regime.low
    low_vol=0.002
    high_vol=0.005
    switch_prob=0.1 #(frequent switches)
    h_alpha=0.7 #(fast decay of volatility after shocks)
    h_reset=False #(with low persistence reset less critical)

    prices = simulate_volatility_clustering(start_regime,low_vol,high_vol,switch_prob,h_reset,h_alpha) 
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = f"../Cirricula/Phase 2/vc_frequent_switching_{seed}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)

    # 5. High Persistence Across Switches
    # Effect: After a big shock, volatility remains elevated even if regime flips back to “low”; long tails of high volatility.
    start_regime = Regime.low
    low_vol=0.001
    high_vol=0.004
    switch_prob=0.03
    h_alpha=0.98 #(very slow decay)
    h_reset=False #(carry over shocks into next regime)

    prices = simulate_volatility_clustering(start_regime,low_vol,high_vol,switch_prob,h_reset,h_alpha) 
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = f"../Cirricula/Phase 2/vc_high_persistence_switching_{seed}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)

    # 6. Quick Reversion After Shock
    # Effect: Shocks spike volatility, but it decays rapidly even if regime remains active; mixed realism.
    start_regime = Regime.low
    low_vol=0.001
    high_vol=0.003
    switch_prob=0.05
    h_alpha=0.6 #(fast decay)
    h_reset=True

    prices = simulate_volatility_clustering(start_regime,low_vol,high_vol,switch_prob,h_reset,h_alpha) 
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = f"../Cirricula/Phase 2/vc_quick_reversion_after_shock_{seed}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)

    # 7. Asymmetric Volatility Levels
    # Effect: Very calm baseline; when active, noise is large. Agent sees stark contrast.
    start_regime = Regime.low
    low_vol=0.0002
    high_vol=0.004
    switch_prob=0.04
    h_alpha=0.9
    h_reset=True

    prices = simulate_volatility_clustering(start_regime,low_vol,high_vol,switch_prob,h_reset,h_alpha) 
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = f"../Cirricula/Phase 2/vc_asymmetric_volatility_{seed}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)

    # 8. Basic high vol profile. 
    # Effect: start in low regime, with not too low low_vol and not too high high_vol, decent switch_prob and with resets at each regime change.
    start_regime = Regime.high
    low_vol = 0.001 # volatility
    high_vol = 0.002 # volatility
    switch_prob = 0.04 
    h_reset=True
    h_alpha = 0.9 # the alpha for ema calculation

    prices = simulate_volatility_clustering(start_regime,low_vol,high_vol,switch_prob,h_reset,h_alpha) 
    volumes,prices = generate_volume_with_ema(prices, volume_profile, ema_span=ema_span)
    file_name = f"../Cirricula/Phase 2/vc_high_basic_{seed}"
    create_file_and_print(prices, volumes, file_name, is_plot=False)

    print(f"avg dlog price: {1000*np.mean(np.abs(np.diff(np.log(prices))))}")
    print(f"avg dlog volume: {np.mean(np.abs(np.log(volumes/volume_profile)))}")


In [None]:
"""
Create the date_file.txt
"""

from pathlib import Path

root = Path(r"../Cirricula/Phase 2")
raw_files: set[Path] = set()

for file in root.rglob("*"):
    if file.name == "date_file.txt" or file.name == "hold": continue
    raw_files.add(file)
    
# replace create date_file and write to it
with open(r"../Cirricula/Phase 2/date_file.txt", "w") as date_file:
    # write hold twice
    date_file.write("hold\n")
    date_file.write("hold\n")
    
    # for each file in rawfile write hold and then the file name
    for file in raw_files:
        date_file.write("hold\n")
        date_file.write(file.name+"\n")

In [27]:
""" 
Check dlog values for actual data volume and prices
"""

from pathlib import Path
import pandas as pd
import numpy as np


root = Path(r"../US_Market_Data")
# root = Path(r"../Cirricula/Phase 2")
root = Path(r"../Cirricula/Phase 1")
count=0
average_price_movements = []
reward_scalar = 1500


for file in root.rglob("*"):
    if file.is_dir() or file.name == "date_file.txt":continue
    df = pd.read_csv(file, index_col=0, header=0)
    
    open = df["Open"].to_numpy()
    close = df["Close"].to_numpy()
    high = df["High"].to_numpy()
    low = df["Low"].to_numpy()
    volume = df["Volume"].to_numpy()
    average_price_movements.append(  np.mean(np.abs( np.log(close/open) )) )
    # if count%10==0:
    #     print(f"avg dlog price: {1000*np.mean(np.abs( np.log(close/open) ))}")
    #     print(f"avg dlog volume: {np.mean(np.abs(np.log(volume/volume_profile)))}")
    count+=1    
    
average_price_movements = np.array(average_price_movements) * reward_scalar

mean = np.mean(average_price_movements)
std = np.std(average_price_movements)
print(f"{root.name} Mean: {mean:.5f}, Std: {std:.5f}")

Phase 1 Mean: 0.94291, Std: 0.72652


In [None]:
data = np.log(close/open).round(8)
cumul_data = np.cumsum(data)

print(cumul_data)
plt.plot(cumul_data)
plt.show()
plt.plot(close)