# Generate Mock Market Data

Generate irregular timestamps and realistic price movements for testing.

In [1]:
import numpy as np
import pandas as pd
import os

## Generate Irregular Timestamps

In [2]:
# Simulation network
total_duration = 36000  # 1 hour in seconds
base_frequency = 2.0   # Average 2 ticks per second
burst_probability = 0.02  # 2% chance of burst per second
burst_intensity = 20   # 20x normal frequency during bursts
burst_duration = 3.0   # Bursts last 3 seconds

np.random.seed(42)

# Generate irregular timestamps
timestamps = []
current_time = 0.0

while current_time < total_duration:
    # Check for burst
    if np.random.random() < burst_probability:
        # Burst period - high frequency
        burst_end = current_time + burst_duration
        while current_time < burst_end and current_time < total_duration:
            # Much shorter intervals during burst
            interval = np.random.exponential(1.0 / (base_frequency * burst_intensity))
            current_time += interval
            timestamps.append(current_time)
    else:
        # Normal period
        interval = np.random.exponential(1.0 / base_frequency)
        current_time += interval
        timestamps.append(current_time)

timestamps = np.array(timestamps)
timestamps = timestamps[timestamps <= total_duration]  # Remove any overflow

print(f"Generated {len(timestamps)} ticks over {total_duration/60:.1f} minutes")
print(f"Average frequency: {len(timestamps)/total_duration:.2f} ticks/second")

Generated 223754 ticks over 600.0 minutes
Average frequency: 6.22 ticks/second


## Generate Realistic Prices

In [3]:
# Generate realistic price movements
initial_price = 100.0
prices = np.zeros(len(timestamps))
prices[0] = initial_price

for i in range(1, len(timestamps)):
    # Simple random walk: small random steps
    random_step = np.random.normal(0, 0.05)  # ±5 cents average
    
    # Add tiny periodic pattern
    time_hours = timestamps[i] / 36  # Convert to hours
    daily_drift = 0.01 * np.sin(2 * np.pi * time_hours / 24)  # 1 cent daily pattern
    
    # Update price with small absolute changes
    prices[i] = prices[i-1] + random_step + daily_drift

print(f"Price range: ${prices.min():.2f} to ${prices.max():.2f}")
print(f"Final price: ${prices[-1]:.2f} (change: {(prices[-1]/initial_price-1)*100:+.2f}%)")

Price range: $64.80 to $133.44
Final price: $119.50 (change: +19.50%)


## Save to Parquet

In [4]:
# Create DataFrame
df = pd.DataFrame({
    'timestamp': np.array(timestamps* 1000, dtype=np.int64),  # Convert to milliseconds
    'price': prices
}).drop_duplicates(subset='timestamp').reset_index(drop=True)

# Save to root project data directory
output_file = '../../data/mock.parquet'
os.makedirs('../../data', exist_ok=True)
df.to_parquet(output_file, index=False)

print(f"Saved {len(df)} records to {output_file}")
print(f"File size: {os.path.getsize(output_file) / 1024:.1f} KB")
print(f"\nDataFrame info:")
print(df.info())
print(f"\nFirst 5 rows:")
print(df.head())

Saved 220534 records to ../../data/mock.parquet
File size: 3195.7 KB

DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220534 entries, 0 to 220533
Data columns (total 2 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   timestamp  220534 non-null  int64  
 1   price      220534 non-null  float64
dtypes: float64(1), int64(1)
memory usage: 3.4 MB
None

First 5 rows:
   timestamp       price
0       1505  100.000000
1       1961  100.036676
2       2046  100.104224
3       3051  100.166131
4       3667  100.206304
