In [1]:
#!/usr/bin/env python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import json
import os
from scipy.stats import t as student_t

class SyntheticMarketGenerator:
    """
    A generator for synthetic OHLCV stock data, using:
      - Random regime switching (bull/bear/correction/crash/recovery)
      - Random initial date offsets
      - Random drift/vol around default bull/bear means
      - Plotting and saving each stock to CSV, PNG, and metadata JSON
      - No explicit market/sector correlation
    """

    def __init__(self,
                 global_seed=None,
                 trading_days_per_year=252,
                 years=10,
                 default_bull_drift=0.12,
                 default_bear_drift=-0.10,
                 default_upward_bias=0.08,
                 default_bull_vol=0.15,
                 default_bear_vol=0.25):
        """
        global_seed: if not None, seeds NumPy RNG once. 
          Avoid re-initializing this in a loop if you want unique data for each stock.
        trading_days_per_year, years:
          define the timeline length for each stock (~252 * years days).
        default_bull_drift, default_bear_drift, default_upward_bias:
          center values for annual drift in bull/bear + upward bias. We'll randomize around these.
        default_bull_vol, default_bear_vol:
          same logic for volatility in bull vs. bear.
        """
        if global_seed is not None:
            np.random.seed(global_seed)

        self.trading_days_per_year = trading_days_per_year
        self.years = years
        self.total_days = trading_days_per_year * years

        # Store the "center" drift/vol values
        self.default_bull_drift = default_bull_drift
        self.default_bear_drift = default_bear_drift
        self.default_upward_bias = default_upward_bias
        self.default_bull_vol = default_bull_vol
        self.default_bear_vol = default_bear_vol

        # Additional generator parameters
        self.params = {
            'flash_crash_prob': 0.0002,
            'flash_crash_magnitude': (-0.15, -0.05), 
            'earnings_jump_prob': 0.01,
            'earnings_jump_magnitude': (-0.08, 0.12),
            'degrees_of_freedom': 8,      # Student-t distribution
            'vol_of_vol': 0.05,           # daily vol-of-vol
            'vol_mean_reversion': 0.80,   # how strongly daily vol reverts
            'base_vol': 0.10,             # floor for annual vol
        }

        # Regime transitions (lower bull->bull => more switching)
        self.regime_transitions = {
            'bull_to_bull': 0.95,
            'bull_to_bear': 0.01,
            'bull_to_correction': 0.04,
            'bear_to_bear': 0.90,
            'bear_to_bull': 0.10,
            'correction_length': (5, 15),
            'correction_depth': (-0.10, -0.03),
        }

        # Regime labels
        self.BULL = "bull"
        self.BEAR = "bear"
        self.CORRECTION = "correction"
        self.CRASH = "crash"
        self.RECOVERY = "recovery"

    def generate_stock_data(self, ticker="STK", initial_price=None, randomize_params=True):
        """
        Generate daily OHLCV data for a single stock and return a DataFrame.

        ticker: label for the DataFrame
        initial_price: if None, pick a random start [50..150]
        randomize_params: if True, randomize bull/bear drift/vol around the default values
        """
        if randomize_params:
            # Randomly tweak these parameters for each stock
            bull_drift = np.random.normal(self.default_bull_drift, 0.03)
            bear_drift = np.random.normal(self.default_bear_drift, 0.02)
            upward_bias = np.random.normal(self.default_upward_bias, 0.02)
            bull_vol = np.random.normal(self.default_bull_vol, 0.03)
            bear_vol = np.random.normal(self.default_bear_vol, 0.03)
        else:
            bull_drift = self.default_bull_drift
            bear_drift = self.default_bear_drift
            upward_bias = self.default_upward_bias
            bull_vol = self.default_bull_vol
            bear_vol = self.default_bear_vol

        # Ensure vol isn't negative:
        bull_vol = max(bull_vol, 0.02)
        bear_vol = max(bear_vol, 0.05)

        # Create date range with random offset
        dates = self._generate_dates_with_offset()

        # Allocate arrays
        N = len(dates)
        close_prices = np.zeros(N)
        open_prices  = np.zeros(N)
        high_prices  = np.zeros(N)
        low_prices   = np.zeros(N)
        volumes      = np.zeros(N)
        regimes      = np.array([self.BULL]*N, dtype=object)
        daily_vols   = np.zeros(N)
        log_returns  = np.zeros(N)

        # Pick initial price
        if initial_price is None:
            initial_price = np.random.uniform(50, 150)
        close_prices[0] = initial_price

        # Start in bull regime
        current_regime = self.BULL
        daily_vol = bull_vol / np.sqrt(self.trading_days_per_year)

        correction_target = None
        correction_end = None

        for i in range(1, N):
            # Possibly update regime
            current_regime, correction_target, correction_end = self._update_regime(
                current_regime, i, regimes, correction_target, correction_end
            )
            regimes[i] = current_regime

            # Determine drift/vol
            drift_annual, vol_annual = self._get_regime_drift_vol(
                current_regime, bull_drift, bear_drift, bull_vol, bear_vol,
                i, correction_target, correction_end
            )

            daily_drift = np.log(1 + drift_annual) / self.trading_days_per_year
            desired_vol = vol_annual / np.sqrt(self.trading_days_per_year)

            # Add upward bias
            daily_drift += upward_bias / self.trading_days_per_year

            # Mean revert volatility
            daily_vol = (
                self.params['vol_mean_reversion']*desired_vol +
                (1 - self.params['vol_mean_reversion'])*daily_vol +
                np.random.normal(0, self.params['vol_of_vol']/self.trading_days_per_year)
            )
            # Floor it
            min_daily_vol = self.params['base_vol']/np.sqrt(self.trading_days_per_year)
            daily_vol = max(daily_vol, min_daily_vol)
            daily_vols[i] = daily_vol

            # Student-t shock
            shock = student_t.rvs(df=self.params['degrees_of_freedom'])
            shock /= np.sqrt(self.params['degrees_of_freedom']/(self.params['degrees_of_freedom'] - 2))

            daily_log_return = daily_drift + daily_vol*shock

            # Special events if not in CRASH/CORRECTION
            if current_regime not in [self.CRASH, self.CORRECTION]:
                daily_log_return = self._special_events(daily_log_return)

            log_returns[i] = daily_log_return
            close_prices[i] = close_prices[i-1]*np.exp(daily_log_return)

            # Generate OHLC
            o, h, l = self._make_ohlc(close_prices[i-1], close_prices[i], daily_vol, current_regime)
            open_prices[i], high_prices[i], low_prices[i] = o, h, l

            # Volume
            volumes[i] = self._make_volume(daily_log_return, daily_vol, current_regime)

        df = pd.DataFrame({
            'Date': dates,
            'Open': open_prices,
            'High': high_prices,
            'Low': low_prices,
            'Close': close_prices,
            'Volume': volumes.astype(int),
            'Regime': regimes,
            'Volatility': daily_vols,
            'LogReturn': log_returns
        })
        df.set_index('Date', inplace=True)

        # Attach metadata
        df.attrs['ticker'] = ticker
        df.attrs['bull_drift'] = bull_drift
        df.attrs['bear_drift'] = bear_drift
        df.attrs['upward_bias'] = upward_bias
        df.attrs['bull_vol'] = bull_vol
        df.attrs['bear_vol'] = bear_vol

        return df

    def _generate_dates_with_offset(self):
        """Generate a list of trading dates with a random offset up to 60 days."""
        offset = np.random.randint(0, 61)
        start_date = datetime.datetime(2010,1,1) + datetime.timedelta(days=offset)

        dates = []
        current = start_date
        while len(dates) < self.total_days:
            if current.weekday() < 5:  # Monday to Friday
                dates.append(current)
            current += datetime.timedelta(days=1)
        return dates

    def _update_regime(self, current_regime, i, regimes, corr_target, corr_end):
        r = np.random.random()
        if current_regime == self.BULL:
            if r < self.regime_transitions['bull_to_bear']:
                current_regime = self.BEAR
            elif r < (self.regime_transitions['bull_to_bear'] +
                      self.regime_transitions['bull_to_correction']):
                current_regime = self.CORRECTION
                dur = np.random.randint(*self.regime_transitions['correction_length'])
                corr_end = i + dur
                corr_target = np.random.uniform(*self.regime_transitions['correction_depth'])
        elif current_regime == self.BEAR:
            if r < self.regime_transitions['bear_to_bull']:
                current_regime = self.RECOVERY
                bear_days = np.sum(regimes[:i] == self.BEAR)
                corr_end = i + int(bear_days*0.5)
        elif current_regime == self.CORRECTION:
            if corr_end is not None and i >= corr_end:
                current_regime = self.BULL
                corr_target = None
                corr_end = None
        elif current_regime == self.RECOVERY:
            if corr_end is not None and i >= corr_end:
                current_regime = self.BULL
                corr_end = None
        elif current_regime == self.CRASH:
            current_regime = self.BULL
        return current_regime, corr_target, corr_end

    def _get_regime_drift_vol(self, regime, bull_drift, bear_drift,
                              bull_vol, bear_vol,
                              day_i, corr_target, corr_end):
        """Return annual drift & vol depending on the current regime."""
        if regime == self.BULL:
            drift = bull_drift
            vol   = bull_vol
        elif regime == self.BEAR:
            drift = bear_drift
            vol   = bear_vol
        elif regime == self.CORRECTION:
            if corr_target is None:
                corr_target = np.random.uniform(*self.regime_transitions['correction_depth'])
            drift = corr_target
            vol   = 0.5*(bull_vol + bear_vol)
        elif regime == self.RECOVERY:
            drift = bull_drift*1.5
            vol   = bull_vol + 0.3*(bear_vol - bull_vol)
        elif regime == self.CRASH:
            drift = np.random.uniform(*self.params['flash_crash_magnitude'])
            vol   = bear_vol*2
        else:
            drift = bull_drift
            vol   = bull_vol
        return drift, vol

    def _special_events(self, daily_log_return):
        """Flash crash or earnings jump with given probabilities."""
        if np.random.random() < self.params['flash_crash_prob']:
            return np.random.uniform(*self.params['flash_crash_magnitude'])
        if np.random.random() < self.params['earnings_jump_prob']:
            if np.random.random() < 0.55:
                jump = np.random.uniform(0, self.params['earnings_jump_magnitude'][1])
            else:
                jump = np.random.uniform(self.params['earnings_jump_magnitude'][0], 0)
            return daily_log_return + jump
        return daily_log_return

    def _make_ohlc(self, prev_close, curr_close, daily_vol, regime):
        """Construct open/high/low from close-to-close movement + random intraday range."""
        if regime in [self.BEAR, self.CRASH]:
            daily_range = 0.03
        else:
            daily_range = 0.02

        bull_daily_vol = self.default_bull_vol / np.sqrt(self.trading_days_per_year)
        factor = daily_vol / bull_daily_vol if bull_daily_vol > 0 else 1
        daily_range *= factor

        open_frac = np.clip(np.random.normal(0.5, 0.2), 0, 1)
        open_price = prev_close + (curr_close - prev_close)*open_frac

        if curr_close > prev_close:
            up_wick = np.random.uniform(0, daily_range*0.7)
            down_wick = np.random.uniform(0, daily_range*0.3)
        else:
            up_wick = np.random.uniform(0, daily_range*0.3)
            down_wick = np.random.uniform(0, daily_range*0.7)

        high_price = max(open_price, curr_close) + up_wick
        low_price  = min(open_price, curr_close) - down_wick

        if high_price < low_price:
            high_price = low_price * 1.001

        return open_price, high_price, low_price

    def _make_volume(self, daily_log_return, daily_vol, regime):
        """Pick daily volume starting ~1M, scaled by daily vol & big moves, etc."""
        base_volume = 1_000_000
        bull_daily_vol = self.default_bull_vol / np.sqrt(self.trading_days_per_year)
        if bull_daily_vol <= 0:
            bull_daily_vol = 1e-9

        vol_factor  = 1 + 1.5*(daily_vol / bull_daily_vol - 1)
        move_factor = 1
        if daily_vol > 0:
            move_factor = 1 + 0.8*(abs(daily_log_return)/daily_vol)
        random_factor = np.random.lognormal(0, 0.6)

        volume = base_volume * vol_factor * move_factor * random_factor

        if regime == self.CRASH:
            volume *= 5
        elif regime == self.BEAR:
            volume *= 1.3

        return volume

    def plot_stock(self, df, ticker, save_path):
        """
        Basic plot: line chart of 'Close' with color shading for each regime.
        Saves the figure to 'save_path'.
        """
        fig, ax = plt.subplots(figsize=(10,6))
        ax.plot(df.index, df['Close'], 'k-', lw=1.5, label='Close')

        regime_colors = {
            self.BULL: 'lightgreen',
            self.BEAR: 'lightcoral',
            self.CORRECTION: 'lightyellow',
            self.CRASH: 'red',
            self.RECOVERY: 'lightblue'
        }

        max_y = df['Close'].max() * 1.1
        for regime_val, color in regime_colors.items():
            mask = (df['Regime'] == regime_val)
            if mask.any():
                ax.fill_between(
                    df.index, 0, max_y,
                    where=mask, color=color, alpha=0.2,
                    label=regime_val
                )

        ax.set_title(f"{ticker} Synthetic Price", fontsize=14)
        ax.set_ylabel("Price")
        ax.grid(True, alpha=0.3)
        ax.legend(loc='best')
        fig.savefig(save_path, dpi=150, bbox_inches='tight')
        plt.close(fig)

    def generate_random_ticker(self, length=4):
        """
        Create a random uppercase 'ticker' name of given length, e.g. 'ABCD'.
        """
        letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
        # We'll pick random letters from the above
        # If you want to ensure uniqueness, you could store used tickers in a set,
        # but for demonstration, this is fine.
        arr = np.random.choice(letters, size=length, replace=True)
        return "".join(arr)

    def generate_portfolio(self, num_stocks=5, output_dir="synthetic_portfolio"):
        """
        Generate data for 'num_stocks' random tickers,
        save CSV + PNG + JSON for each.
        """
        os.makedirs(output_dir, exist_ok=True)
        all_stocks = {}

        for _ in range(num_stocks):
            # Make a random 3- or 4-letter ticker name
            name_len = np.random.choice([3,4])
            ticker_name = self.generate_random_ticker(length=name_len)

            df = self.generate_stock_data(ticker=ticker_name)
            all_stocks[ticker_name] = df

            # Save CSV
            csv_path = os.path.join(output_dir, f"{ticker_name}.csv")
            df.to_csv(csv_path)

            # Save figure
            fig_path = os.path.join(output_dir, f"{ticker_name}.png")
            self.plot_stock(df, ticker_name, fig_path)

            # Save metadata as JSON
            meta = {
                'ticker': ticker_name,
                'bull_drift': df.attrs['bull_drift'],
                'bear_drift': df.attrs['bear_drift'],
                'upward_bias': df.attrs['upward_bias'],
                'bull_vol': df.attrs['bull_vol'],
                'bear_vol': df.attrs['bear_vol']
            }
            meta_path = os.path.join(output_dir, f"{ticker_name}_metadata.json")
            with open(meta_path, 'w') as f:
                json.dump(meta, f, indent=2)

        print(f"Generated {num_stocks} random stocks into '{output_dir}'.")
        return all_stocks

def main():
    # Example usage: generate 5 random stocks with a global seed
    gen = SyntheticMarketGenerator(global_seed=42)
    gen.generate_portfolio(num_stocks=50, output_dir="random_ticker_portfolio")

if __name__ == "__main__":
    main()


Generated 50 random stocks into 'random_ticker_portfolio'.


In [3]:
#!/usr/bin/env python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import json
import os
from scipy.stats import t as student_t

class ComplexSyntheticMarketGenerator:
    """
    Synthetic market data generator with:
      - A global bull/bear regime that flips with certain probabilities
      - GARCH(1,1)-style volatility for each stock
      - Random 'news events' that can hit the entire market or a single sector
      - A slow 'macro cycle' that modifies drift over multi-year periods
      - Random stock-specific growth rates
      - Saving each stock as CSV, PNG chart, plus JSON metadata
    """

    def __init__(self,
                 global_seed=None,
                 trading_days_per_year=252,
                 years=10,
                 # Global regime settings
                 prob_bull_to_bear=0.02,
                 prob_bear_to_bull=0.05,
                 global_bull_drift=0.10,   # annual drift in bull
                 global_bear_drift=-0.05, # annual drift in bear
                 # GARCH params
                 garch_w=1e-5,  
                 garch_a=0.05,
                 garch_b=0.90,
                 # slow macro cycle
                 macro_cycle_length=5,    # half-cycle in years
                 macro_cycle_amplitude=0.05  # amplitude of cyclical drift
                 ):
        """
        global_seed: if set, seeds NumPy RNG once. 
                     Use a single instance for all stocks to get unique draws.

        trading_days_per_year, years: define total timeline (e.g., 2520 days for 10 years).

        prob_bull_to_bear, prob_bear_to_bull: daily chance that the global market flips regime.

        global_bull_drift, global_bear_drift: base annual drift used for the global regime.
          Each stock will add its own stock-specific drift on top of these.

        GARCH(1,1) parameters: w, a, b in the formula
          sigma_t^2 = w + a * e_{t-1}^2 + b * sigma_{t-1}^2
          (We'll call them garch_w, garch_a, garch_b.)

        macro_cycle_length: half-cycle in years for a slow 'economic cycle' wave
          For example, if macro_cycle_length=5, then a full wave is ~10 years,
          so your total simulation might see exactly one up-then-down cycle.
        
        macro_cycle_amplitude: how much drift is added/subtracted at the top/bottom of the cycle.
          E.g., if amplitude=0.05, then at the peak of the wave you add +0.05 annual drift,
          at the trough you add -0.05, and so on.
        """
        if global_seed is not None:
            np.random.seed(global_seed)

        # Timeline
        self.trading_days_per_year = trading_days_per_year
        self.years = years
        self.total_days = trading_days_per_year * years

        # Global regime probabilities
        self.prob_bull_to_bear = prob_bull_to_bear
        self.prob_bear_to_bull = prob_bear_to_bull

        # Global bull/bear drift
        self.global_bull_drift = global_bull_drift
        self.global_bear_drift = global_bear_drift

        # GARCH parameters
        self.garch_w = garch_w
        self.garch_a = garch_a
        self.garch_b = garch_b

        # Macro cycle
        self.macro_cycle_length = macro_cycle_length
        self.macro_cycle_amplitude = macro_cycle_amplitude

        # For random 'news events'
        self.params_news = {
            'market_event_prob': 0.001,   # daily chance of entire market event
            'sector_event_prob': 0.002,   # daily chance of a sector-only event
            'market_event_magnitude': (-0.03, 0.05),  # possible negative or positive
            'sector_event_magnitude': (-0.05, 0.08),
        }

        # We'll define a handful of sample sectors:
        self.sectors = ["TECH", "ENERGY", "FINANCE", "RETAIL", "HEALTH", "UTILITY", "INDUSTRIAL"]

    def generate_portfolio(self, num_stocks=5, output_dir="complex_synthetic_portfolio"):
        """
        Generate data for 'num_stocks' random stocks, each assigned a random sector and unique drift.
        We'll simulate day by day for the entire market, including:
          - A single 'global regime' timeseries
          - Each stock's GARCH-based volatility
          - Potential market or sector news events
        Then we'll save each stock's CSV, PNG, plus JSON metadata.
        """
        os.makedirs(output_dir, exist_ok=True)

        # 1) We'll create a daily date array, skipping weekends
        dates = self._generate_trading_dates()

        # 2) Simulate a 'global regime' series for each day
        global_regimes = self._simulate_global_regimes(len(dates))

        # 3) Create a slow macro cycle array that modifies drift over time
        macro_cycle_array = self._make_macro_cycle(len(dates))

        # 4) (Optional) Each day, we might have 'news events' that affect either the entire market or one sector
        #    We'll create a daily array with (market_jump, sector_jump, sector_impacted)
        #    Then, in each stock's daily returns, we add the jump if it applies to that stock's sector.
        news_events = self._generate_news_events(len(dates))

        # 5) For each stock:
        all_stocks = {}
        for _ in range(num_stocks):
            # pick a random ticker name
            ticker = self._make_random_ticker()
            # pick a random sector
            sector = np.random.choice(self.sectors)
            # pick a random base drift for this stock (some might be high-growth, some stable)
            # e.g. ~ normal(0.02, 0.03) => 2% average drift ± some
            stock_specific_drift = np.random.normal(0.02, 0.03)

            # GARCH initialization
            # We'll keep a separate array of daily close prices for this stock, plus the GARCH state
            df_stock = self._simulate_one_stock(
                ticker, sector, stock_specific_drift,
                dates, global_regimes, macro_cycle_array, news_events
            )

            all_stocks[ticker] = df_stock

            # Save data
            csv_path = os.path.join(output_dir, f"{ticker}.csv")
            df_stock.to_csv(csv_path)

            # Save figure
            fig_path = os.path.join(output_dir, f"{ticker}.png")
            self._plot_stock(df_stock, fig_path)

            # Save metadata
            meta = {
                'ticker': ticker,
                'sector': sector,
                'stock_specific_drift': stock_specific_drift
            }
            meta_path = os.path.join(output_dir, f"{ticker}_metadata.json")
            with open(meta_path, 'w') as f:
                json.dump(meta, f, indent=2)

        print(f"Generated {num_stocks} stocks into '{output_dir}'.")
        return all_stocks

    def _simulate_one_stock(self, ticker, sector, stock_drift, dates,
                            global_regimes, macro_cycle_array, news_events):
        """
        Simulate daily prices for one stock using:
          - GARCH(1,1) volatility
          - global regime drift (bull/bear) + stock drift + macro cycle
          - news events if relevant to the entire market or this sector
        """
        N = len(dates)
        # Arrays to store results
        close_prices = np.zeros(N)
        open_prices = np.zeros(N)
        high_prices = np.zeros(N)
        low_prices  = np.zeros(N)
        volumes     = np.zeros(N)
        log_returns = np.zeros(N)
        daily_vols  = np.zeros(N)
        # We'll store the final 'Regime' as the global regime for reference
        regimes     = global_regimes.copy()

        # pick random initial price
        close_prices[0] = np.random.uniform(50, 150)

        # GARCH initialization
        # we'll store e_{t-1}, sigma_{t-1}^2
        prev_residual = 0.0
        prev_sigma_sq = (0.1)**2  # e.g. start with some guess for variance

        for i in range(1, N):
            # 1) figure out daily drift from global regime
            if regimes[i] == "bull":
                # annual drift
                regime_drift = self.global_bull_drift
            else:  # "bear"
                regime_drift = self.global_bear_drift

            # 2) add stock drift + macro cycle
            #    e.g. total annual drift
            daily_drift_annual = regime_drift + stock_drift + macro_cycle_array[i]
            # convert to daily log drift
            daily_drift = np.log(1 + daily_drift_annual)/self.trading_days_per_year

            # 3) GARCH(1,1) for sigma^2
            # sigma_t^2 = w + a * e_{t-1}^2 + b * sigma_{t-1}^2
            sigma_sq = ( self.garch_w
                         + self.garch_a*(prev_residual**2)
                         + self.garch_b*prev_sigma_sq )
            sigma = np.sqrt(max(sigma_sq, 1e-12))  # avoid zero
            daily_vols[i] = sigma

            # 4) random shock from Student-t
            shock = student_t.rvs(df=8)
            # scale shock by 'sigma'
            epsilon = shock * sigma

            # 5) incorporate news events for day i
            #    news_events[i] => (market_jump, sector_jump, impacted_sector)
            market_jump, sector_jump, impacted_sector = news_events[i]
            news_impact = market_jump
            if impacted_sector == sector: 
                news_impact += sector_jump

            # so total daily log return = drift + shock + news
            daily_log_return = daily_drift + epsilon + news_impact

            log_returns[i] = daily_log_return

            # update close price
            close_prices[i] = close_prices[i-1]*np.exp(daily_log_return)

            # store residual for next step
            # residual is "shock" = daily_log_return - daily_drift
            # but let's just use epsilon for simplicity
            prev_residual = epsilon
            prev_sigma_sq = sigma_sq

            # generate OHLC
            o, h, l = self._make_ohlc(close_prices[i-1], close_prices[i], sigma, regimes[i])
            open_prices[i], high_prices[i], low_prices[i] = o, h, l

            # generate volume
            volumes[i] = self._make_volume(daily_log_return, sigma, regimes[i])

        df = pd.DataFrame({
            'Date': dates,
            'Open': open_prices,
            'High': high_prices,
            'Low':  low_prices,
            'Close': close_prices,
            'Volume': volumes.astype(int),
            'Regime': regimes,
            'Volatility': daily_vols,
            'LogReturn': log_returns
        })
        df.set_index('Date', inplace=True)
        return df

    def _generate_trading_dates(self):
        """Generate a list of daily 'trading dates' skipping weekends, for self.total_days."""
        start_date = datetime.datetime(2010,1,1)
        dates = []
        current = start_date
        while len(dates) < self.total_days:
            if current.weekday() < 5:  # Mon-Fri
                dates.append(current)
            current += datetime.timedelta(days=1)
        return dates

    def _simulate_global_regimes(self, n_days):
        """
        Create a daily series of "bull" or "bear" for the entire market,
        flipping with certain daily probabilities.
        """
        regimes = np.array(["bull"]*n_days, dtype=object)
        # We'll start in bull
        for i in range(1, n_days):
            if regimes[i-1] == "bull":
                # prob bull->bear
                if np.random.random() < self.prob_bull_to_bear:
                    regimes[i] = "bear"
                else:
                    regimes[i] = "bull"
            else:  # was bear
                if np.random.random() < self.prob_bear_to_bull:
                    regimes[i] = "bull"
                else:
                    regimes[i] = "bear"
        return regimes

    def _make_macro_cycle(self, n_days):
        """
        A slow sinusoidal wave that goes from -amplitude to +amplitude over 'macro_cycle_length' years.
        e.g. if length=5, that's half a cycle in 5 years => full cycle in 10 years.
        We'll return an array of length n_days, each day giving an annual drift offset.
        """
        # total days for half cycle
        half_cycle_days = self.macro_cycle_length*self.trading_days_per_year
        # let x range from 0..2*pi over full cycle => we have 2*half_cycle
        # so in half_cycle_days, we move pi in the sine argument
        # We'll do a single wave over the entire timeline for simplicity
        # So if the entire timeline is n_days, we'll do n_days*(pi/half_cycle_days)
        # Then the sine goes from 0.. many cycles, let's do 1 cycle over the entire timeline
        # For more variety, you can do multiple cycles.
        x_array = np.linspace(0, np.pi*2, n_days)  # one full cycle = up + down
        # amplitude is e.g. 0.05 => +/- 5% drift
        wave = self.macro_cycle_amplitude * np.sin(x_array)
        # wave ranges -amplitude..+amplitude
        # This wave is an ANNUAL drift offset, e.g. +0.05 means +5% annual drift
        return wave

    def _generate_news_events(self, n_days):
        """
        For each day, randomly decide:
          - Is there a market-wide event?
          - Is there a sector-specific event?
        Return an array of tuples (market_jump, sector_jump, impacted_sector).
          market_jump: float for that day
          sector_jump: float for that day
          impacted_sector: which sector is impacted, or None
        """
        # For each day, we pick random draws
        events = []
        possible_sectors = self.sectors  # or you can pick from a subset
        for _ in range(n_days):
            market_jump = 0.0
            sector_jump = 0.0
            impacted_sector = None

            # market event?
            if np.random.random() < self.params_news['market_event_prob']:
                # pick magnitude
                market_jump = np.random.uniform(*self.params_news['market_event_magnitude'])
            # sector event?
            if np.random.random() < self.params_news['sector_event_prob']:
                sector_jump = np.random.uniform(*self.params_news['sector_event_magnitude'])
                impacted_sector = np.random.choice(possible_sectors)

            events.append((market_jump, sector_jump, impacted_sector))
        return events

    def _make_ohlc(self, prev_close, curr_close, daily_vol, regime):
        """
        Construct open/high/low from close-to-close movement plus random intraday range.
        We'll do a small approach:
          - daily_range ~ 0.02 or 0.03, scaled by daily_vol
          - open is a random fraction between prev_close & curr_close
        """
        if regime == "bear":
            base_range = 0.03
        else:
            base_range = 0.02

        # scale range by ratio of daily_vol to some reference
        reference_vol = 0.15 / np.sqrt(self.trading_days_per_year)  # if bull_vol=0.15
        scale_factor = daily_vol/(reference_vol if reference_vol>0 else 1e-9)
        daily_range = base_range*scale_factor

        open_frac = np.clip(np.random.normal(0.5, 0.15), 0, 1)
        open_price = prev_close + (curr_close - prev_close)*open_frac

        if curr_close>prev_close:
            up_wick = np.random.uniform(0, daily_range*0.7)
            down_wick = np.random.uniform(0, daily_range*0.3)
        else:
            up_wick = np.random.uniform(0, daily_range*0.3)
            down_wick = np.random.uniform(0, daily_range*0.7)

        high_price = max(open_price, curr_close)+up_wick
        low_price  = min(open_price, curr_close)-down_wick
        if high_price<low_price:
            high_price = low_price*1.001

        return open_price, high_price, low_price

    def _make_volume(self, daily_log_return, daily_vol, regime):
        """
        Basic volume function: start ~1e6, scale up if daily_vol is high or if the absolute return is big.
        Extra factor if regime is bear => panic trading, etc.
        """
        base_vol = 1_000_000
        reference_vol = 0.15/np.sqrt(self.trading_days_per_year)
        vol_factor = 1 + 2*(daily_vol/(reference_vol if reference_vol>0 else 1e-9) - 1)
        move_factor = 1 + 0.8*abs(daily_log_return)/(daily_vol if daily_vol>0 else 1e-9)
        r_factor = np.random.lognormal(0, 0.6)
        final_vol = base_vol*vol_factor*move_factor*r_factor
        if regime == "bear":
            final_vol *= 1.2
        return final_vol

    def _plot_stock(self, df, fig_path):
        """
        Simple line chart of 'Close' plus shading for bull vs bear (from df['Regime']).
        """
        fig, ax = plt.subplots(figsize=(10,6))
        ax.plot(df.index, df['Close'], 'k-', lw=1.5, label="Close")

        bull_mask = (df['Regime']=="bull")
        bear_mask = (df['Regime']=="bear")
        max_price = df['Close'].max()*1.1

        if bull_mask.any():
            ax.fill_between(df.index, 0, max_price, where=bull_mask, color='lightgreen', alpha=0.2, label="Bull")
        if bear_mask.any():
            ax.fill_between(df.index, 0, max_price, where=bear_mask, color='lightcoral', alpha=0.2, label="Bear")

        ax.set_title("Synthetic Stock Price", fontsize=14)
        ax.set_ylabel("Price")
        ax.grid(True, alpha=0.3)
        ax.legend(loc='best')
        fig.savefig(fig_path, dpi=150, bbox_inches='tight')
        plt.close(fig)

    def _make_random_ticker(self):
        """Create a random 3-4 letter uppercase ticker name."""
        length = np.random.choice([3,4])
        letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
        arr = np.random.choice(letters, size=length, replace=True)
        return "".join(arr)

def main():
    # Example usage: create a generator with a global seed for reproducibility
    gen = ComplexSyntheticMarketGenerator(global_seed=42,
                                          trading_days_per_year=252,
                                          years=10,
                                          prob_bull_to_bear=0.02,
                                          prob_bear_to_bull=0.05,
                                          global_bull_drift=0.10,
                                          global_bear_drift=-0.05,
                                          garch_w=1e-5,
                                          garch_a=0.05,
                                          garch_b=0.90,
                                          macro_cycle_length=5,
                                          macro_cycle_amplitude=0.05)
    
    # Generate, say, 5 random stocks
    gen.generate_portfolio(num_stocks=5, output_dir="complex_synthetic_portfolio")

if __name__ == "__main__":
    main()


Generated 5 stocks into 'complex_synthetic_portfolio'.
