In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import datetime
import os
import json
from tqdm import tqdm

In [10]:


class SyntheticMarketGenerator:
    """
    A comprehensive generator for synthetic OHLCV market data that mimics real market behaviors
    with a realistic upward bias and proper market regime balance.
    """
    
    def __init__(self, seed=None):
        """
        Initialize the generator with optional random seed for reproducibility.
        
        Parameters:
        -----------
        seed : int, optional
            Random seed for reproducibility
        """
        if seed is not None:
            np.random.seed(seed)
        
        # Improved parameters with more realistic market characteristics
        self.params = {
            # Time series parameters
            'trading_days_per_year': 252,
            'years': 10,
            
            # Market regime parameters - adjusted for more realistic bull/bear balance
            'regime_transitions': {
                'bull_to_bull': 0.9985,    # Higher probability to stay in bull market
                'bull_to_bear': 0.0005,    # Lower probability to enter bear market
                'bear_to_bear': 0.985,     # Lower probability to stay in bear market
                'bear_to_bull': 0.015,     # Higher probability to exit bear market
                'bull_to_correction': 0.0015, # Probability of entering a correction
                'correction_length': (5, 22), # Shorter corrections
                'correction_depth': (-0.15, -0.05), # Milder corrections
            },
            
            # Return parameters - stronger upward bias and milder bear markets
            'annual_drift': {
                'bull': 0.18,               # Stronger bull market returns
                'bear': -0.20,              # Less severe bear market returns
                'upward_bias': 0.04,        # Additional upward bias for long-term growth
            },
            
            # Volatility parameters
            'annual_volatility': {
                'bull': 0.14,               # Slightly lower bull market volatility
                'bear': 0.25,               # Slightly lower bear market volatility
                'base': 0.08,               # Base volatility level
            },
            'volatility_of_volatility': 0.06, # Increased for better volatility clustering
            'volatility_mean_reversion': 0.80, # Slower mean reversion for better clustering
            
            # Fat tail parameters
            'degrees_of_freedom': 4,        # Fatter tails (more extreme moves)
            
            # OHLC parameters
            'daily_range': {
                'bull': 0.014,              # Daily high-low range in bull market
                'bear': 0.025,              # Daily high-low range in bear market
            },
            'open_location': 0.5,           # Where open typically falls in prev close to current close gap
            'open_location_std': 0.3,       # Variation in open location
            
            # Volume parameters
            'avg_volume': 1000000,          # Average daily volume
            'volume_volatility': 0.5,       # Higher volume volatility
            'volume_trend_coef': 0.8,       # Stronger correlation with trend
            'volume_volatility_coef': 1.5,  # Stronger correlation with volatility
            
            # Event parameters
            'flash_crash': {
                'probability': 0.0001,      # Rarer flash crashes
                'magnitude': (-0.12, -0.04), # Milder flash crashes
                'recovery': (0.4, 0.8),     # Better recovery from crashes
            },
            'earnings_jumps': {
                'probability': 0.01,        # More frequent earnings jumps
                'magnitude': (-0.15, 0.20), # Asymmetric jumps (positive bias)
            },
            
            # Correlation parameters (for multiple stocks)
            'market_beta': (0.5, 1.8),      # Wider beta range
            'correlation_base': 0.4,        # Base correlation between stocks
            'sector_correlation_boost': 0.3, # Additional correlation for same-sector stocks
            
            # Seasonal patterns
            'monthly_effects': {
                1: 0.02,    # January - positive effect
                4: 0.01,    # April
                5: 0.01,    # May
                8: -0.005,  # August
                10: 0.01,   # October
                12: 0.02,   # December - positive effect
            },
            'day_of_week_effects': {
                0: -0.001,  # Monday
                4: 0.002,   # Friday
            }
        }
        
        # Internal state
        self.current_volatility = None
        self.current_regime = None
        self.correction_end_day = None
        self.correction_target = None
        self.recovery_end_day = None
        self.flash_crash_recovery_level = None
        
        # Market regimes
        self.BULL = 'bull'
        self.BEAR = 'bear'
        self.CORRECTION = 'correction'
        self.CRASH = 'crash'
        self.RECOVERY = 'recovery'
        
    def generate_single_stock(self, ticker, sector=None, beta=None, trend_bias=0):
        """
        Generate synthetic OHLCV data for a single stock.
        
        Parameters:
        -----------
        ticker : str
            Ticker symbol for the stock
        sector : str, optional
            Sector the stock belongs to (for cross-stock correlations)
        beta : float, optional
            Beta coefficient to the market. If None, a random beta is assigned.
        trend_bias : float, optional
            Bias to add to the drift term (can create stocks with stronger or weaker trends)
            
        Returns:
        --------
        pd.DataFrame
            DataFrame containing OHLCV data and additional metadata
        """
        # Calculate the total number of trading days
        total_days = self.params['trading_days_per_year'] * self.params['years']
        
        # Assign beta if not provided
        if beta is None:
            beta = np.random.uniform(*self.params['market_beta'])
        
        # Generate trading dates
        start_date = datetime.datetime(2010, 1, 1)
        dates = self._generate_trading_dates(start_date, total_days)
        
        # Initialize arrays for OHLCV data
        close_prices = np.zeros(total_days)
        open_prices = np.zeros(total_days)
        high_prices = np.zeros(total_days)
        low_prices = np.zeros(total_days)
        volumes = np.zeros(total_days)
        regimes = np.array([self.BULL] * total_days, dtype='object')
        volatilities = np.zeros(total_days)
        returns = np.zeros(total_days)
        
        # Initialize starting values
        close_prices[0] = 100  # Start at $100
        self.current_volatility = self.params['annual_volatility']['bull'] / np.sqrt(self.params['trading_days_per_year'])
        self.current_regime = self.BULL
        self.correction_target = None
        
        # Generate the price and volume series
        for i in range(1, total_days):
            # Get the date
            current_date = dates[i]
            month = current_date.month
            day_of_week = current_date.weekday()
            
            # Update regimes
            self._update_regime(i, regimes)
            
            # Store the current regime
            regimes[i] = self.current_regime
            
            # Calculate drift and volatility based on regime
            drift, volatility = self._calculate_drift_volatility(i, regimes, beta, trend_bias)
            
            # Update current volatility with mean reversion and volatility of volatility
            # More pronounced volatility clustering
            self.current_volatility = (self.params['volatility_mean_reversion'] * volatility + 
                                      (1 - self.params['volatility_mean_reversion']) * self.current_volatility +
                                      np.random.normal(0, self.params['volatility_of_volatility'] / 
                                                      np.sqrt(self.params['trading_days_per_year'])))
            
            # Ensure volatility doesn't go too low
            self.current_volatility = max(self.current_volatility, 
                                         self.params['annual_volatility']['base'] / np.sqrt(self.params['trading_days_per_year']))
            
            # Store the current volatility
            volatilities[i] = self.current_volatility
            
            # Apply seasonal effects
            drift += self._apply_seasonal_effects(month, day_of_week)
            
            # Add long-term growth bias (adjusted by beta)
            drift += (self.params['annual_drift']['upward_bias'] * beta) / self.params['trading_days_per_year']
            
            # Generate fat-tailed return
            t_scaled = stats.t.rvs(df=self.params['degrees_of_freedom']) / np.sqrt(self.params['degrees_of_freedom'] / (self.params['degrees_of_freedom'] - 2))
            daily_return = drift + self.current_volatility * t_scaled
            
            # Check for special events
            if self.current_regime != self.CRASH and self.current_regime != self.CORRECTION:
                daily_return = self._apply_special_events(i, daily_return, regimes)
            
            # Store the return
            returns[i] = daily_return
            
            # Calculate close price
            close_prices[i] = close_prices[i-1] * (1 + daily_return)
            
            # Generate OHLC prices
            open_prices[i], high_prices[i], low_prices[i] = self._generate_ohlc(
                close_prices[i-1], close_prices[i], self.current_volatility
            )
            
            # Generate volume
            volumes[i] = self._generate_volume(daily_return, self.current_volatility)
            
        # Create DataFrame with OHLCV data
        df = pd.DataFrame({
            'Date': dates,
            'Open': open_prices,
            'High': high_prices,
            'Low': low_prices,
            'Close': close_prices,
            'Volume': volumes.astype(int),
            'Regime': regimes,
            'Volatility': volatilities,
            'Return': returns
        })
        
        # Set Date as index
        df.set_index('Date', inplace=True)
        
        # Add metadata
        df.attrs['ticker'] = ticker
        df.attrs['sector'] = sector
        df.attrs['beta'] = beta
        df.attrs['trend_bias'] = trend_bias
        
        return df
    
    def generate_market_portfolio(self, num_stocks=50, sectors=None, sector_weights=None):
        """
        Generate a portfolio of synthetic stocks with realistic cross-correlations.
        
        Parameters:
        -----------
        num_stocks : int
            Number of stocks to generate
        sectors : list, optional
            List of sector names. If None, default sectors are used
        sector_weights : dict, optional
            Dictionary mapping sectors to their weights in the portfolio
            
        Returns:
        --------
        dict
            Dictionary mapping tickers to their data
        pd.DataFrame
            Market index data (equal-weighted)
        """
        # Default sectors if not provided
        if sectors is None:
            sectors = [
                'Technology', 'Healthcare', 'Financials', 'Consumer Discretionary',
                'Consumer Staples', 'Industrials', 'Energy', 'Materials', 'Utilities', 'Real Estate'
            ]
        
        # Default sector weights if not provided - adjusted to reflect more realistic market composition
        if sector_weights is None:
            sector_weights = {
                'Technology': 0.25,          # Higher tech weighting
                'Healthcare': 0.15,
                'Financials': 0.15,
                'Consumer Discretionary': 0.12,
                'Consumer Staples': 0.08,
                'Industrials': 0.10,
                'Energy': 0.05,
                'Materials': 0.03,
                'Utilities': 0.03,
                'Real Estate': 0.04
            }
            # Ensure all sectors in the list have a weight
            for sector in sectors:
                if sector not in sector_weights:
                    sector_weights[sector] = 0.02
        
        # Normalize sector weights
        total_weight = sum(sector_weights.values())
        sector_weights = {k: v/total_weight for k, v in sector_weights.items()}
        
        # Calculate stocks per sector
        stocks_per_sector = {}
        remaining_stocks = num_stocks
        
        for sector, weight in sector_weights.items():
            if sector == list(sector_weights.keys())[-1]:
                # Last sector gets remaining stocks
                stocks_per_sector[sector] = remaining_stocks
            else:
                # Allocate stocks proportionally, with a minimum of 1
                sector_stocks = max(1, int(num_stocks * weight))
                stocks_per_sector[sector] = sector_stocks
                remaining_stocks -= sector_stocks
        
        # Generate a single "market factor" that all stocks will correlate with
        # with stronger upward bias for the market factor
        market_data = self.generate_single_stock("MARKET", beta=1.0, trend_bias=0.02)
        market_returns = market_data['Return'].values
        
        # Generate sector factors - correlated with market but with unique components
        sector_returns = {}
        for sector in sectors:
            # Sector returns have correlation with market plus unique variation
            sector_correlation = 0.7 + np.random.uniform(-0.2, 0.2)
            sector_unique_vol = np.sqrt(1 - sector_correlation**2)
            
            # Add sector-specific bias
            sector_bias = 0
            if sector == 'Technology':
                sector_bias = 0.03  # Tech outperformance
            elif sector == 'Healthcare':
                sector_bias = 0.02  # Healthcare outperformance
            elif sector == 'Energy':
                sector_bias = -0.01  # Energy underperformance
            
            # Apply the sector bias to the return series
            sector_daily_bias = sector_bias / self.params['trading_days_per_year']
            sector_returns_series = (
                sector_correlation * market_returns + 
                sector_unique_vol * np.random.normal(0, 1, len(market_returns))
            )
            
            # Add the daily bias
            sector_returns_series = sector_returns_series + sector_daily_bias
            sector_returns[sector] = sector_returns_series
        
        # Generate individual stocks
        all_stocks = {}
        ticker_template = "{}{:02d}"  # e.g., TECH01, HEAL02, etc.
        
        for sector, num_stocks in stocks_per_sector.items():
            sector_prefix = sector[:4].upper()
            
            for i in range(num_stocks):
                ticker = ticker_template.format(sector_prefix, i+1)
                
                # Random beta to market - with sector-specific tendencies
                base_beta_min, base_beta_max = self.params['market_beta']
                
                # Adjust beta ranges by sector
                if sector == 'Technology':
                    sector_beta_range = (1.1, 1.8)  # Higher beta for tech
                elif sector == 'Consumer Staples' or sector == 'Utilities':
                    sector_beta_range = (0.5, 0.9)  # Lower beta for defensive sectors
                elif sector == 'Financials':
                    sector_beta_range = (0.9, 1.5)  # Medium-high beta for financials
                else:
                    sector_beta_range = (base_beta_min, base_beta_max)  # Default range
                
                beta = np.random.uniform(*sector_beta_range)
                
                # Random trend bias - some stocks outperform/underperform their sectors
                # Wider range for individual stock performance
                trend_bias = np.random.normal(0, 0.08)
                
                # Generate the stock
                stock_data = self.generate_single_stock(ticker, sector, beta, trend_bias)
                
                # Apply correlation adjustments
                stock_data = self._apply_correlations(stock_data, market_returns, sector_returns[sector])
                
                all_stocks[ticker] = stock_data
        
        # Create a market index from the stocks (equal weighted)
        market_index = self._create_market_index(all_stocks)
        
        return all_stocks, market_index
    
    def _generate_trading_dates(self, start_date, total_days):
        """Generate a series of trading dates, excluding weekends and holidays."""
        all_dates = []
        current_date = start_date
        
        # Simple holiday detection (major US holidays)
        us_holidays = [
            # New Year's Day
            (1, 1),
            # Martin Luther King Jr. Day (3rd Monday in January)
            (1, 15), (1, 16), (1, 17), (1, 18), (1, 19), (1, 20), (1, 21),
            # President's Day (3rd Monday in February)
            (2, 15), (2, 16), (2, 17), (2, 18), (2, 19), (2, 20), (2, 21),
            # Good Friday (approximate)
            (3, 30), (3, 31), (4, 1), (4, 2), (4, 3), (4, 4), (4, 5),
            # Memorial Day (last Monday in May)
            (5, 25), (5, 26), (5, 27), (5, 28), (5, 29), (5, 30), (5, 31),
            # Independence Day
            (7, 4),
            # Labor Day (1st Monday in September)
            (9, 1), (9, 2), (9, 3), (9, 4), (9, 5), (9, 6), (9, 7),
            # Thanksgiving (4th Thursday in November)
            (11, 22), (11, 23), (11, 24), (11, 25), (11, 26), (11, 27), (11, 28),
            # Christmas
            (12, 25)
        ]
        
        while len(all_dates) < total_days:
            # Skip weekends
            if current_date.weekday() >= 5:
                current_date += datetime.timedelta(days=1)
                continue
                
            # Skip holidays (very simple approximation)
            if (current_date.month, current_date.day) in us_holidays:
                current_date += datetime.timedelta(days=1)
                continue
                
            all_dates.append(current_date)
            current_date += datetime.timedelta(days=1)
            
        return all_dates
    
    def _update_regime(self, day, regimes):
        """Update the market regime based on transition probabilities and current state."""
        prev_regime = self.current_regime
        
        if prev_regime == self.BULL:
            # Check for transition to bear market
            if np.random.random() < self.params['regime_transitions']['bull_to_bear']:
                self.current_regime = self.BEAR
                
            # Check for correction within bull market
            elif np.random.random() < self.params['regime_transitions']['bull_to_correction']:
                self.current_regime = self.CORRECTION
                # Determine correction duration
                min_days, max_days = self.params['regime_transitions']['correction_length']
                correction_duration = np.random.randint(min_days, max_days)
                self.correction_end_day = day + correction_duration
                
                # Set correction target (how deep the correction will be)
                self.correction_target = np.random.uniform(*self.params['regime_transitions']['correction_depth'])
                
        elif prev_regime == self.BEAR:
            # Check for transition to recovery
            if np.random.random() < self.params['regime_transitions']['bear_to_bull']:
                self.current_regime = self.RECOVERY
                # Recovery duration is proportional to the bear market duration
                bear_duration = len([r for r in regimes[:day] if r == self.BEAR])
                # Make recovery faster than the bear market
                self.recovery_end_day = day + int(bear_duration * 0.6)
                
        elif prev_regime == self.CORRECTION:
            # End correction after predetermined duration
            if day >= self.correction_end_day:
                self.current_regime = self.BULL
                self.correction_target = None
                
        elif prev_regime == self.RECOVERY:
            # End recovery phase
            if day >= self.recovery_end_day:
                self.current_regime = self.BULL
                
        elif prev_regime == self.CRASH:
            # Flash crash only lasts one day, then enters recovery
            self.current_regime = self.BULL
    
    def _calculate_drift_volatility(self, day, regimes, beta, trend_bias):
        """Calculate drift and volatility based on current regime."""
        days_per_year = self.params['trading_days_per_year']
        
        if self.current_regime == self.BULL:
            base_drift = self.params['annual_drift']['bull'] / days_per_year
            base_volatility = self.params['annual_volatility']['bull'] / np.sqrt(days_per_year)
            
        elif self.current_regime == self.BEAR:
            base_drift = self.params['annual_drift']['bear'] / days_per_year
            base_volatility = self.params['annual_volatility']['bear'] / np.sqrt(days_per_year)
            
        elif self.current_regime == self.CORRECTION:
            # During corrections, drift is negative but not as severe as bear markets
            if self.correction_target is None:
                # If correction target is not set for some reason, create a new one
                self.correction_target = np.random.uniform(*self.params['regime_transitions']['correction_depth'])
                
            # Calculate remaining correction days
            remaining_days = max(1, self.correction_end_day - day)
            
            # Distribute the correction over the remaining days
            proportion = 1.0 / remaining_days
            base_drift = self.correction_target * proportion
            
            # Corrections have increased volatility but not as high as bear markets
            vol_level = (self.params['annual_volatility']['bull'] + 
                         0.5 * (self.params['annual_volatility']['bear'] - self.params['annual_volatility']['bull']))
            base_volatility = vol_level / np.sqrt(days_per_year)
            
        elif self.current_regime == self.RECOVERY:
            # Recovery drift is strong positive
            base_drift = (self.params['annual_drift']['bull'] * 1.8) / days_per_year
            base_volatility = (self.params['annual_volatility']['bull'] + 
                          0.3 * (self.params['annual_volatility']['bear'] - self.params['annual_volatility']['bull'])) / np.sqrt(days_per_year)
            
        elif self.current_regime == self.CRASH:
            # Flash crash drift is very negative for one day
            base_drift = np.random.uniform(*self.params['flash_crash']['magnitude'])
            base_volatility = self.params['annual_volatility']['bear'] * 2 / np.sqrt(days_per_year)
            
        # Apply beta adjustment
        drift = base_drift * beta + trend_bias / days_per_year
        volatility = base_volatility * np.sqrt(beta)  # Using sqrt of beta for volatility relationship
        
        return drift, volatility
    
    def _apply_seasonal_effects(self, month, day_of_week):
        """Apply seasonal effects to drift term."""
        month_effect = self.params['monthly_effects'].get(month, 0) / self.params['trading_days_per_year']
        dow_effect = self.params['day_of_week_effects'].get(day_of_week, 0) / 5
        
        return month_effect + dow_effect
    
    def _apply_special_events(self, day, daily_return, regimes):
        """Apply special events like flash crashes or earnings jumps."""
        # Check for flash crash
        if np.random.random() < self.params['flash_crash']['probability']:
            self.current_regime = self.CRASH
            crash_magnitude = np.random.uniform(*self.params['flash_crash']['magnitude'])
            return crash_magnitude
            
        # Check for earnings jump or other idiosyncratic event
        if np.random.random() < self.params['earnings_jumps']['probability']:
            # Slightly positively biased jumps
            if np.random.random() < 0.55:  # 55% chance of positive jump
                jump_magnitude = np.random.uniform(0, self.params['earnings_jumps']['magnitude'][1])
            else:
                jump_magnitude = np.random.uniform(self.params['earnings_jumps']['magnitude'][0], 0)
                
            return daily_return + jump_magnitude
            
        return daily_return
    
    def _generate_ohlc(self, prev_close, curr_close, volatility):
        """Generate Open, High, Low prices given Close prices and volatility."""
        regime = self.current_regime
        
        # Determine daily range based on regime
        if regime in [self.BEAR, self.CRASH]:
            range_param = self.params['daily_range']['bear']
        else:
            range_param = self.params['daily_range']['bull']
            
        # Daily range scales with volatility
        daily_range = range_param * (volatility / (self.params['annual_volatility']['bull'] / 
                                                  np.sqrt(self.params['trading_days_per_year'])))
        
        # Add randomness to the range
        daily_range *= np.random.lognormal(0, 0.3)
        
        # Determine where Open falls between previous Close and current Close
        open_loc = np.random.normal(self.params['open_location'], self.params['open_location_std'])
        open_loc = max(0, min(1, open_loc))  # Clamp between 0 and 1
        
        # Calculate Open price
        price_change = curr_close - prev_close
        open_price = prev_close + price_change * open_loc
        
        # Calculate High and Low with realistic relationships
        if curr_close > prev_close:  # Up day
            # High is typically reached after open on up days
            high_price = max(open_price, curr_close) + np.random.uniform(0, daily_range * 0.7)
            low_price = min(open_price, curr_close) - np.random.uniform(0, daily_range * 0.3)
        else:  # Down day
            # Low is typically reached after open on down days
            high_price = max(open_price, curr_close) + np.random.uniform(0, daily_range * 0.3)
            low_price = min(open_price, curr_close) - np.random.uniform(0, daily_range * 0.7)
            
        # Ensure High > Low
        if high_price <= low_price:
            high_price = low_price * 1.001
            
        # Ensure High >= max(Open, Close) and Low <= min(Open, Close)
        high_price = max(high_price, open_price, curr_close)
        low_price = min(low_price, open_price, curr_close)
        
        return open_price, high_price, low_price
    
    def _generate_volume(self, daily_return, volatility):
        """Generate volume based on return and volatility."""
        base_volume = self.params['avg_volume']
        
        # Volume increases with absolute returns (trend strength)
        trend_factor = 1 + self.params['volume_trend_coef'] * abs(daily_return / volatility)
        
        # Volume increases with volatility
        normal_vol = self.params['annual_volatility']['bull'] / np.sqrt(self.params['trading_days_per_year'])
        volatility_factor = 1 + self.params['volume_volatility_coef'] * (volatility / normal_vol - 1)
        
        # Add randomness
        random_factor = np.random.lognormal(0, self.params['volume_volatility'])
        
        volume = base_volume * trend_factor * volatility_factor * random_factor
        
        # Special case for crashes - volume spikes
        if self.current_regime == self.CRASH:
            volume *= 3 + np.random.uniform(0, 2)
            
        # Volume tends to be higher in bear markets
        if self.current_regime == self.BEAR:
            volume *= 1.2
            
        return volume
    
    def _apply_correlations(self, stock_data, market_returns, sector_returns):
        """Apply realistic correlations to stock returns based on market and sector factors."""
        # Get original returns
        original_returns = stock_data['Return'].values
        beta = stock_data.attrs['beta']
        
        # Parameters for correlation adjustment - adjusted for more realistic correlation structure
        idiosyncratic_weight = 0.35  # How much of the return is stock-specific
        sector_weight = 0.35 * beta  # How much comes from sector
        market_weight = 0.30 * beta  # How much comes from overall market
        
        # Ensure weights sum to 1
        total_weight = idiosyncratic_weight + sector_weight + market_weight
        idiosyncratic_weight /= total_weight
        sector_weight /= total_weight
        market_weight /= total_weight
        
        # Create a mix of idiosyncratic, sector and market returns
        adjusted_returns = (
            idiosyncratic_weight * original_returns +
            sector_weight * sector_returns +
            market_weight * market_returns
        )
        
        # Adjust volatility to match original
        vol_ratio = np.std(original_returns) / np.std(adjusted_returns) if np.std(adjusted_returns) > 0 else 1
        adjusted_returns = adjusted_returns * vol_ratio
        
        # Replace the returns in the dataframe
        stock_data['Return'] = adjusted_returns
        
        # Recalculate prices based on new returns
        close_prices = stock_data['Close'].values
        close_prices[0] = 100  # Start price
        
        for i in range(1, len(close_prices)):
            close_prices[i] = close_prices[i-1] * (1 + adjusted_returns[i])
            
        stock_data['Close'] = close_prices
        
        # Regenerate OHLC
        for i in range(1, len(close_prices)):
            prev_close = close_prices[i-1]
            curr_close = close_prices[i]
            volatility = stock_data['Volatility'].iloc[i]
            
            open_price, high_price, low_price = self._generate_ohlc(prev_close, curr_close, volatility)
            
            stock_data.loc[stock_data.index[i], 'Open'] = open_price
            stock_data.loc[stock_data.index[i], 'High'] = high_price
            stock_data.loc[stock_data.index[i], 'Low'] = low_price
            
        return stock_data

In [11]:
# Example usage
def generate_example_data():
    """Generate example synthetic market data."""
    # Initialize the generator
    generator = SyntheticMarketGenerator(seed=42)
    
    # Generate a portfolio of 50 stocks in 10 sectors
    print("Generating 50 synthetic stocks across 10 sectors...")
    stocks, market_index = generator.generate_market_portfolio(num_stocks=50)
    
    # Add the market index to the dictionary
    stocks['INDEX'] = market_index
    
    # Plot examples
    print("Plotting example data...")
    plt.figure(figsize=(15, 10))
    
    # Plot the market index
    generator.plot_stock(market_index, title="Synthetic Market Index")
    plt.savefig("synthetic_market_index.png")
    plt.close()
    
    # Plot 3 random stocks
    import random
    random_tickers = random.sample(list(stocks.keys()), 3)
    
    for ticker in random_tickers:
        if ticker != 'INDEX':
            generator.plot_stock(stocks[ticker])
            plt.savefig(f"synthetic_stock_{ticker}.png")
            plt.close()
    
    # Save the data
    print("Saving data to files...")
    generator.save_data(stocks)
    
    print("Example data generation complete!")
    return stocks, market_index


if __name__ == "__main__":
    stocks, market_index = generate_example_data()

Generating 50 synthetic stocks across 10 sectors...


AttributeError: 'SyntheticMarketGenerator' object has no attribute '_create_market_index'