In [3]:
#!/usr/bin/env python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import json
import os
from scipy.stats import t as student_t

class SyntheticMarketGenerator:
    """
    A stripped-down generator for synthetic OHLCV data with:
      - Randomized start date offsets for each stock
      - Bull/bear/correction/crash regimes
      - Unique daily drift & volatility per stock
      - Saving CSV & figure for each ticker
      - No explicit correlation to a 'market factor' or 'sector factor'
    """

    def __init__(self,
                 global_seed=None,
                 trading_days_per_year=252,
                 years=10,
                 default_bull_drift=0.12,
                 default_bear_drift=-0.10,
                 default_upward_bias=0.08,
                 default_bull_vol=0.15,
                 default_bear_vol=0.25):
        """
        global_seed: If not None, seeds NumPy RNG once. Avoid re-creating this in a loop
                     if you want distinct stocks in the same run.
        trading_days_per_year, years: define the timeline for each stock
        default_bull_drift, default_bear_drift, default_upward_bias:
            central values for drift in bull, bear, & an overall upward bias. 
            We'll randomize around these for each stock (unless changed).
        default_bull_vol, default_bear_vol: same logic for vol.
        """
        if global_seed is not None:
            np.random.seed(global_seed)

        self.trading_days_per_year = trading_days_per_year
        self.years = years
        self.total_days = trading_days_per_year * years

        # Base parameters around which we'll randomize
        self.default_bull_drift = default_bull_drift
        self.default_bear_drift = default_bear_drift
        self.default_upward_bias = default_upward_bias
        self.default_bull_vol = default_bull_vol
        self.default_bear_vol = default_bear_vol

        # Additional generator parameters
        self.params = {
            'flash_crash_prob': 0.0002,  # Probability each day
            'flash_crash_magnitude': (-0.15, -0.05),  # in log-returns
            'earnings_jump_prob': 0.01,
            'earnings_jump_magnitude': (-0.08, 0.12),
            'degrees_of_freedom': 8,     # Student-t distribution
            'vol_of_vol': 0.05,          # daily vol-of-vol
            'vol_mean_reversion': 0.80,  # how strongly daily vol reverts
            'base_vol': 0.10,            # floor for annual vol
        }

        # Regime transitions. Let's not stick in bull too strongly:
        self.regime_transitions = {
            'bull_to_bull': 0.95,
            'bull_to_bear': 0.01,
            'bull_to_correction': 0.04,
            'bear_to_bear': 0.90,
            'bear_to_bull': 0.10,
            'correction_length': (5, 15),
            'correction_depth': (-0.10, -0.03),
        }

        # Regime labels
        self.BULL = "bull"
        self.BEAR = "bear"
        self.CORRECTION = "correction"
        self.CRASH = "crash"
        self.RECOVERY = "recovery"

    def generate_stock_data(self, ticker="STK", initial_price=None, randomize_params=True):
        """
        Generate daily OHLCV data for one stock, returning a DataFrame.

        ticker: Label for the DataFrame
        initial_price: If None, pick a random start [50..150]
        randomize_params: If True, randomize bull_drift, bear_drift, upward_bias, 
                          bull_vol, bear_vol around the default values.
        """
        # Possibly randomize drift/vol for this stock
        if randomize_params:
            bull_drift = np.random.normal(self.default_bull_drift, 0.03)
            bear_drift = np.random.normal(self.default_bear_drift, 0.02)
            upward_bias = np.random.normal(self.default_upward_bias, 0.02)
            bull_vol = np.random.normal(self.default_bull_vol, 0.03)
            bear_vol = np.random.normal(self.default_bear_vol, 0.03)
        else:
            bull_drift = self.default_bull_drift
            bear_drift = self.default_bear_drift
            upward_bias = self.default_upward_bias
            bull_vol = self.default_bull_vol
            bear_vol = self.default_bear_vol

        # Ensure vol isn't negative:
        bull_vol = max(bull_vol, 0.02)
        bear_vol = max(bear_vol, 0.05)

        # Build the date range, with random offset
        dates = self._generate_dates_with_offset()

        # Allocate arrays
        N = len(dates)
        close_prices = np.zeros(N)
        open_prices  = np.zeros(N)
        high_prices  = np.zeros(N)
        low_prices   = np.zeros(N)
        volumes      = np.zeros(N)
        regimes      = np.array([self.BULL]*N, dtype=object)
        daily_vols   = np.zeros(N)
        log_returns  = np.zeros(N)

        # Random initial price if not specified
        if initial_price is None:
            initial_price = np.random.uniform(50, 150)
        close_prices[0] = initial_price

        # Start in bull regime
        current_regime = self.BULL
        # Start daily vol in bull daily terms
        daily_vol = bull_vol / np.sqrt(self.trading_days_per_year)

        # For correction tracking
        correction_target = None
        correction_end    = None

        for i in range(1, N):
            # Possibly update regime
            current_regime, correction_target, correction_end = self._update_regime(
                current_regime, i, regimes, correction_target, correction_end
            )
            regimes[i] = current_regime

            # Compute annual drift/vol for this regime
            drift_annual, vol_annual = self._get_regime_drift_vol(
                current_regime, bull_drift, bear_drift,
                bull_vol, bear_vol, i,
                correction_target, correction_end
            )
            # Convert to daily
            daily_drift = np.log(1 + drift_annual) / self.trading_days_per_year
            desired_vol = vol_annual / np.sqrt(self.trading_days_per_year)

            # Add upward bias
            daily_drift += upward_bias / self.trading_days_per_year

            # Evolve volatility (mean reversion + random shock)
            daily_vol = (
                self.params['vol_mean_reversion']*desired_vol +
                (1 - self.params['vol_mean_reversion'])*daily_vol +
                np.random.normal(0, self.params['vol_of_vol']/self.trading_days_per_year)
            )
            # Floor at base vol
            min_daily_vol = self.params['base_vol']/np.sqrt(self.trading_days_per_year)
            daily_vol = max(daily_vol, min_daily_vol)
            daily_vols[i] = daily_vol

            # Student-t shock
            shock = student_t.rvs(df=self.params['degrees_of_freedom'])
            shock /= np.sqrt(self.params['degrees_of_freedom']/(self.params['degrees_of_freedom']-2))

            daily_log_return = daily_drift + daily_vol*shock

            # Possibly apply special events (flash crash, etc.)
            if current_regime not in [self.CRASH, self.CORRECTION]:
                daily_log_return = self._special_events(daily_log_return)

            log_returns[i] = daily_log_return
            # Update close
            close_prices[i] = close_prices[i-1]*np.exp(daily_log_return)

            # Generate OHLC
            o,h,l = self._make_ohlc(close_prices[i-1], close_prices[i], daily_vol, current_regime)
            open_prices[i], high_prices[i], low_prices[i] = o,h,l

            # Volume
            volumes[i] = self._make_volume(daily_log_return, daily_vol, current_regime)

        # Build DataFrame
        df = pd.DataFrame({
            'Date': dates,
            'Open': open_prices,
            'High': high_prices,
            'Low': low_prices,
            'Close': close_prices,
            'Volume': volumes.astype(int),
            'Regime': regimes,
            'Volatility': daily_vols,
            'LogReturn': log_returns
        })
        df.set_index('Date', inplace=True)

        # Attach some metadata
        df.attrs['ticker'] = ticker
        df.attrs['bull_drift'] = bull_drift
        df.attrs['bear_drift'] = bear_drift
        df.attrs['upward_bias'] = upward_bias
        df.attrs['bull_vol'] = bull_vol
        df.attrs['bear_vol'] = bear_vol

        return df

    def _generate_dates_with_offset(self):
        """Generate a list of trading dates, skipping weekends, plus a random offset up to 60 days."""
        offset = np.random.randint(0, 61)
        start_date = datetime.datetime(2010,1,1) + datetime.timedelta(days=offset)

        dates = []
        current = start_date
        while len(dates) < self.total_days:
            if current.weekday() < 5:  # Mon-Fri
                dates.append(current)
            current += datetime.timedelta(days=1)
        return dates

    def _update_regime(self, current_regime, i, regimes, corr_target, corr_end):
        """Simple regime switching logic with daily probability draws."""
        r = np.random.random()

        if current_regime == self.BULL:
            if r < self.regime_transitions['bull_to_bear']:
                current_regime = self.BEAR
            elif r < (self.regime_transitions['bull_to_bear']
                      + self.regime_transitions['bull_to_correction']):
                current_regime = self.CORRECTION
                dur = np.random.randint(*self.regime_transitions['correction_length'])
                corr_end = i+dur
                corr_target = np.random.uniform(*self.regime_transitions['correction_depth'])
        elif current_regime == self.BEAR:
            if r < self.regime_transitions['bear_to_bull']:
                current_regime = self.RECOVERY
                bear_days = np.sum(regimes[:i]==self.BEAR)
                corr_end = i + int(bear_days*0.5)
        elif current_regime == self.CORRECTION:
            if corr_end is not None and i>=corr_end:
                current_regime = self.BULL
                corr_target = None
                corr_end = None
        elif current_regime == self.RECOVERY:
            if corr_end is not None and i>=corr_end:
                current_regime = self.BULL
                corr_end = None
        elif current_regime == self.CRASH:
            # revert to bull after crash
            current_regime = self.BULL

        return current_regime, corr_target, corr_end

    def _get_regime_drift_vol(self, regime, bull_drift, bear_drift,
                              bull_vol, bear_vol,
                              day_i, corr_target, corr_end):
        """Pick annual drift & vol based on regime."""
        if regime == self.BULL:
            drift = bull_drift
            vol   = bull_vol
        elif regime == self.BEAR:
            drift = bear_drift
            vol   = bear_vol
        elif regime == self.CORRECTION:
            if corr_target is None:
                corr_target = np.random.uniform(*self.regime_transitions['correction_depth'])
            drift = corr_target
            vol   = (bull_vol + bear_vol)*0.5
        elif regime == self.RECOVERY:
            drift = bull_drift*1.5
            vol   = bull_vol + 0.3*(bear_vol - bull_vol)
        elif regime == self.CRASH:
            drift = np.random.uniform(*self.params['flash_crash_magnitude'])
            vol   = bear_vol*2
        else:
            drift = bull_drift
            vol   = bull_vol

        return drift, vol

    def _special_events(self, daily_log_return):
        """Check for flash crash or earnings jump events."""
        # Flash crash
        if np.random.random() < self.params['flash_crash_prob']:
            return np.random.uniform(*self.params['flash_crash_magnitude'])
        # Earnings jump
        if np.random.random() < self.params['earnings_jump_prob']:
            if np.random.random() < 0.55:
                jump = np.random.uniform(0, self.params['earnings_jump_magnitude'][1])
            else:
                jump = np.random.uniform(self.params['earnings_jump_magnitude'][0], 0)
            return daily_log_return + jump
        return daily_log_return

    def _make_ohlc(self, prev_close, curr_close, daily_vol, regime):
        """Construct open/high/low from yesterday's close & today's close + random wiggles."""
        if regime in [self.BEAR, self.CRASH]:
            daily_range = 0.03
        else:
            daily_range = 0.02

        bull_daily_vol = self.default_bull_vol / np.sqrt(self.trading_days_per_year)
        factor = daily_vol/bull_daily_vol if bull_daily_vol>0 else 1
        daily_range *= factor

        open_frac = np.clip(np.random.normal(0.5, 0.2), 0, 1)
        open_price = prev_close + (curr_close - prev_close)*open_frac

        if curr_close>prev_close:
            up_wick = np.random.uniform(0, daily_range*0.7)
            down_wick = np.random.uniform(0, daily_range*0.3)
        else:
            up_wick = np.random.uniform(0, daily_range*0.3)
            down_wick = np.random.uniform(0, daily_range*0.7)

        high_price = max(open_price, curr_close) + up_wick
        low_price  = min(open_price, curr_close) - down_wick
        if high_price<low_price:
            high_price = low_price*1.001

        return open_price, high_price, low_price

    def _make_volume(self, daily_log_return, daily_vol, regime):
        """Pick a daily volume starting ~1M shares, scaled by volatility, daily move, etc."""
        base_volume = 1_000_000
        bull_daily_vol = self.default_bull_vol / np.sqrt(self.trading_days_per_year)
        if bull_daily_vol == 0:
            bull_daily_vol = 1e-9

        vol_factor  = 1 + 1.5*(daily_vol/bull_daily_vol - 1)
        move_factor = 1 + 0.8*(abs(daily_log_return)/(daily_vol if daily_vol>0 else 1e-9))
        random_factor = np.random.lognormal(0, 0.6)

        volume = base_volume * vol_factor * move_factor * random_factor

        if regime == self.CRASH:
            volume *= 5
        elif regime == self.BEAR:
            volume *= 1.3

        return volume

    def plot_stock(self, df, ticker, save_path):
        """
        Simple plot: line for Close, plus color shading for each regime.
        Saves as a PNG to save_path.
        """
        fig, ax = plt.subplots(figsize=(10,6))
        ax.plot(df.index, df['Close'], 'k-', lw=1.5, label='Close')

        # We'll shade for each regime
        # We can define a color map:
        regime_colors = {
            self.BULL: 'lightgreen',
            self.BEAR: 'lightcoral',
            self.CORRECTION: 'lightyellow',
            self.CRASH: 'red',
            self.RECOVERY: 'lightblue'
        }
        # We'll step day by day, or group contiguous days of the same regime
        # but a simpler approach: for each known regime, fill where it matches
        for reg, color in regime_colors.items():
            mask = (df['Regime']==reg)
            if mask.any():
                ax.fill_between(df.index, 0, df['Close'].max()*1.1,
                                where=mask, color=color, alpha=0.2,
                                label=f"{reg}")

        ax.set_title(f"{ticker} Synthetic Price", fontsize=14)
        ax.set_ylabel("Price")
        ax.grid(True, alpha=0.3)
        ax.legend(loc='best')
        fig.savefig(save_path, dpi=150, bbox_inches='tight')
        plt.close(fig)

    def generate_portfolio(self, tickers=None, output_dir="synthetic_portfolio"):
        """
        Generate data for multiple stocks, save each as CSV, 
        and also create a PNG figure using `plot_stock`.
        """
        if tickers is None:
            tickers = [f"STK{i+1:02d}" for i in range(10)]

        os.makedirs(output_dir, exist_ok=True)
        all_stocks = {}

        for t in tickers:
            df = self.generate_stock_data(ticker=t)
            all_stocks[t] = df

            # Save CSV
            csv_path = os.path.join(output_dir, f"{t}.csv")
            df.to_csv(csv_path)

            # Save PNG figure
            fig_path = os.path.join(output_dir, f"{t}.png")
            self.plot_stock(df, t, fig_path)

            # Optionally, save some metadata as JSON
            meta = {
                'ticker': t,
                'bull_drift': df.attrs['bull_drift'],
                'bear_drift': df.attrs['bear_drift'],
                'upward_bias': df.attrs['upward_bias'],
                'bull_vol': df.attrs['bull_vol'],
                'bear_vol': df.attrs['bear_vol']
            }
            meta_path = os.path.join(output_dir, f"{t}_metadata.json")
            with open(meta_path, 'w') as f:
                json.dump(meta, f, indent=2)

        print(f"Generated {len(tickers)} stocks into '{output_dir}' (CSV + PNG + JSON).")
        return all_stocks

def main():
    # Create the generator with a global seed for reproducibility.
    gen = SyntheticMarketGenerator(global_seed=42,
                                   default_bull_drift=0.12,
                                   default_bear_drift=-0.10,
                                   default_upward_bias=0.08,
                                   default_bull_vol=0.15,
                                   default_bear_vol=0.25)

    # Example usage: generate a portfolio of 5 stocks
    tickers = ["APPLE", "GOOG", "TSLA", "XOM", "JNJ"]
    gen.generate_portfolio(tickers=tickers, output_dir="my_synthetic_portfolio")

if __name__ == "__main__":
    main()


Generated 5 stocks into 'my_synthetic_portfolio' (CSV + PNG + JSON).
