In [None]:
class MarketRegime:
    """Handles regime logic with long-term trend consideration"""
    BULL = "bull"
    BEAR = "bear"
    CORRECTION = "correction"
    RECOVERY = "recovery"
    
    def __init__(self):
        self.thresholds = {
            'bear_market': -0.20,
            'correction': -0.10,
            'recovery_threshold': 0.15,
            'bull_confirmation': 0.05
        }
        
        self.windows = {
            'peak_window': 60,
            'support_window': 30,
            'trend_window': 20,
            'long_term_window': 252  # 1 year for long-term trend
        }
        
        # New trend persistence parameters
        self.trend_persistence = {
            'bull_continuation': 0.998,  # Very high probability to stay in bull
            'recovery_speed': 0.25,     # Faster recoveries
            'min_bull_duration': 126    # Minimum 6 months for bull markets
        }

    def detect_regime(self, prices, index, current_regime):
        """Enhanced regime detection with trend persistence"""
        if index < self.windows['peak_window']:
            return self.BULL
            
        current_price = prices[index]
        
        # Calculate drawdown from peak
        lookback = min(index, self.windows['peak_window'])
        price_window = prices[index - lookback:index + 1]
        peak = np.max(price_window)
        drawdown = (current_price / peak) - 1
        
        # Calculate long-term trend
        long_lookback = min(index, self.windows['long_term_window'])
        long_term_return = (current_price / prices[index - long_lookback]) - 1
        
        # Enhanced regime logic with persistence
        if current_regime == self.BULL:
            if drawdown <= self.thresholds['bear_market']:
                regime_duration = self._get_regime_duration(prices, index, self.BULL)
                if regime_duration >= self.trend_persistence['min_bull_duration']:
                    return self.BEAR
                return self.CORRECTION
            return self.BULL
            
        elif current_regime == self.BEAR:
            if self._check_recovery(prices, index):
                return self.RECOVERY
            return self.BEAR
            
        elif current_regime == self.RECOVERY:
            if drawdown <= self.thresholds['bear_market']:
                return self.BEAR
            if self._check_bull_confirmation(prices, index):
                return self.BULL
            return self.RECOVERY
            
        elif current_regime == self.CORRECTION:
            if drawdown <= self.thresholds['bear_market']:
                return self.BEAR
            if self._check_bull_confirmation(prices, index):
                return self.BULL
            return self.CORRECTION
            
        return self.BULL

    def _check_recovery(self, prices, index):
        """Check if price action confirms recovery"""
        lookback = min(index, self.windows['support_window'])
        price_window = prices[index - lookback:index + 1]
        trough = np.min(price_window)
        rally = (prices[index] / trough) - 1
        return rally >= self.thresholds['recovery_threshold']

    def _check_bull_confirmation(self, prices, index):
        """Check if bull market is confirmed"""
        lookback = min(index, self.windows['trend_window'])
        return (prices[index] / prices[index - lookback]) - 1 >= self.thresholds['bull_confirmation']

    def _get_regime_duration(self, prices, index, regime):
        """Calculate how long we've been in the current regime"""
        duration = 0
        for i in range(index, max(-1, index - self.windows['long_term_window']), -1):
            if self.detect_regime(prices, i, regime) != regime:
                break
            duration += 1
        return duration


class SyntheticMarketGenerator:
    def __init__(self,
                 global_seed=None,
                 trading_days_per_year=252,
                 years=10,
                 default_bull_drift=0.20,      # Increased to 20% annual in bulls
                 default_bear_drift=-0.35,     # -35% in bears
                 default_upward_bias=0.06,     # Increased upward bias to 6%
                 default_bull_vol=0.13,        # Reduced bull market vol
                 default_bear_vol=0.28):       # Reduced bear market vol
        
        if global_seed is not None:
            np.random.seed(global_seed)

        self.trading_days_per_year = trading_days_per_year
        self.years = years
        self.total_days = trading_days_per_year * years
        
        self.default_bull_drift = default_bull_drift
        self.default_bear_drift = default_bear_drift
        self.default_upward_bias = default_upward_bias
        self.default_bull_vol = default_bull_vol
        self.default_bear_vol = default_bear_vol
        
        self.regime_manager = MarketRegime()
        
        # Enhanced market dynamics
        self.params = {
            'vol_mean_reversion': 0.92,
            'vol_of_vol': 0.06,
            'base_vol': 0.08,
            'max_vol': 0.45,
            'momentum_decay': 0.99,        # Stronger momentum persistence
            'momentum_impact': 0.15,       # Stronger momentum effect
            'earnings_jump_prob': 0.02,
            'earnings_jump_range': (-0.12, 0.15),
            'flash_crash_prob': 0.0003,
            'flash_crash_range': (-0.12, -0.06),
            'long_term_growth': 0.08,      # 8% base annual growth rate
            'growth_vol': 0.02            # Variation in growth rate
        }

    def generate_stock_data(self, ticker="STK", initial_price=None):
        """Generate stock data with enhanced trend following"""
        dates = self._generate_dates_with_offset()
        N = len(dates)
        
        # Initialize arrays
        close_prices = np.zeros(N)
        open_prices = np.zeros(N)
        high_prices = np.zeros(N)
        low_prices = np.zeros(N)
        volumes = np.zeros(N)
        daily_vols = np.zeros(N)
        regimes = np.empty(N, dtype=object)
        log_returns = np.zeros(N)
        
        # Set initial conditions
        if initial_price is None:
            initial_price = np.random.uniform(50, 150)
        
        close_prices[0] = initial_price
        daily_vols[0] = self.default_bull_vol / np.sqrt(self.trading_days_per_year)
        regimes[0] = self.regime_manager.BULL
        
        # Initialize first day
        open_prices[0], high_prices[0], low_prices[0] = self._generate_first_day_ohlc(
            initial_price, daily_vols[0]
        )
        volumes[0] = self._generate_volume(0, daily_vols[0], regimes[0])
        
        # State variables
        momentum = 0.0
        volatility_regime = 1.0
        long_term_trend = self.params['long_term_growth']
        
        for i in range(1, N):
            # Update long-term trend
            if i % self.trading_days_per_year == 0:
                long_term_trend = max(
                    0.05,  # Minimum growth rate
                    self.params['long_term_growth'] + 
                    np.random.normal(0, self.params['growth_vol'])
                )
            
            # Detect regime
            current_regime = self.regime_manager.detect_regime(
                close_prices, i-1, regimes[i-1]
            )
            regimes[i] = current_regime
            
            # Get base parameters
            drift, vol = self._get_regime_parameters(current_regime)
            
            # Add long-term trend and momentum
            drift += long_term_trend
            drift += momentum * self.params['momentum_impact']
            
            # Calculate daily parameters
            daily_drift = np.log(1 + drift) / self.trading_days_per_year
            daily_drift += self.default_upward_bias / self.trading_days_per_year
            
            # Update volatility
            target_vol = vol * volatility_regime
            daily_vols[i] = self._update_volatility(daily_vols[i-1], target_vol)
            
            # Generate return
            log_returns[i] = self._generate_return(daily_drift, daily_vols[i])
            
            # Apply special events
            if current_regime not in [self.regime_manager.BEAR, self.regime_manager.CORRECTION]:
                log_returns[i] = self._apply_special_events(log_returns[i])
            
            # Update price
            close_prices[i] = close_prices[i-1] * np.exp(log_returns[i])
            
            # Generate OHLC and volume
            open_prices[i], high_prices[i], low_prices[i] = self._generate_ohlc(
                close_prices[i-1], close_prices[i], daily_vols[i], current_regime
            )
            volumes[i] = self._generate_volume(log_returns[i], daily_vols[i], current_regime)
            
            # Update state
            momentum = (
                momentum * self.params['momentum_decay'] + 
                log_returns[i] * (1 - self.params['momentum_decay'])
            )
            volatility_regime = self._update_volatility_regime(
                volatility_regime, log_returns[i], current_regime
            )

        # Create DataFrame
        df = pd.DataFrame({
            'Date': dates,
            'Open': open_prices,
            'High': high_prices,
            'Low': low_prices,
            'Close': close_prices,
            'Volume': volumes.astype(int),
            'Regime': regimes,
            'Volatility': daily_vols,
            'LogReturn': log_returns
        })
        df.set_index('Date', inplace=True)
        
        # Calculate performance metrics
        total_return = (df['Close'].iloc[-1] / df['Close'].iloc[0]) - 1
        years = len(df) / self.trading_days_per_year
        annualized_return = (1 + total_return) ** (1/years) - 1
        
        df.attrs['ticker'] = ticker
        df.attrs['annualized_return'] = annualized_return
        df.attrs['total_return'] = total_return
        df.attrs['sharpe_ratio'] = self._calculate_sharpe_ratio(df)
        
        return df

    def _calculate_sharpe_ratio(self, df):
        """Calculate Sharpe ratio of returns"""
        excess_returns = df['LogReturn'] - 0.02/self.trading_days_per_year  # 2% risk-free rate
        return (
            np.sqrt(self.trading_days_per_year) * 
            np.mean(excess_returns) / np.std(df['LogReturn'])
        )

    def _generate_dates_with_offset(self):
        """Generate trading dates with random offset"""
        offset = np.random.randint(0, 61)
        start_date = datetime.datetime(2010,1,1) + datetime.timedelta(days=offset)
        
        dates = []
        current = start_date
        while len(dates) < self.total_days:
            if current.weekday() < 5:  # Monday to Friday
                dates.append(current)
            current += datetime.timedelta(days=1)
        return dates

    def _get_regime_parameters(self, regime):
        """Get base drift and volatility for given regime"""
        if regime == self.regime_manager.BULL:
            drift = self.default_bull_drift
            vol = self.default_bull_vol
        elif regime == self.regime_manager.BEAR:
            drift = self.default_bear_drift
            vol = self.default_bear_vol
        elif regime == self.regime_manager.CORRECTION:
            drift = self.default_bear_drift * 0.5
            vol = (self.default_bull_vol + self.default_bear_vol) * 0.5
        elif regime == self.regime_manager.RECOVERY:
            drift = self.default_bull_drift * 1.5
            vol = self.default_bear_vol * 0.7
        else:
            drift = self.default_bull_drift
            vol = self.default_bull_vol
            
        return drift, vol

    def _update_volatility(self, current_vol, target_vol):
        """Update volatility with mean reversion and bounds"""
        daily_target = target_vol / np.sqrt(self.trading_days_per_year)
        
        daily_vol = (
            self.params['vol_mean_reversion'] * daily_target +
            (1 - self.params['vol_mean_reversion']) * current_vol +
            np.random.normal(0, self.params['vol_of_vol'] / np.sqrt(self.trading_days_per_year))
        )
        
        # Apply bounds
        min_daily_vol = self.params['base_vol'] / np.sqrt(self.trading_days_per_year)
        max_daily_vol = self.params['max_vol'] / np.sqrt(self.trading_days_per_year)
        return np.clip(daily_vol, min_daily_vol, max_daily_vol)

    def _generate_return(self, drift, vol):
        """Generate daily return with fat tails"""
        shock = student_t.rvs(df=5)  # df=5 for fat tails
        shock /= np.sqrt(5/3)  # Scale to unit variance
        return drift + vol * shock

    def _apply_special_events(self, daily_return):
        """Apply earnings jumps or flash crashes"""
        if np.random.random() < self.params['flash_crash_prob']:
            return np.random.uniform(*self.params['flash_crash_range'])
            
        if np.random.random() < self.params['earnings_jump_prob']:
            jump = np.random.uniform(*self.params['earnings_jump_range'])
            return daily_return + jump
            
        return daily_return

    def _generate_first_day_ohlc(self, initial_price, daily_vol):
        """Generate OHLC for the first day"""
        open_dev = np.random.normal(0, daily_vol)
        open_price = initial_price * (1 + open_dev)
        
        if open_dev > 0:
            high_price = max(open_price, initial_price) * (1 + abs(np.random.normal(0, daily_vol)))
            low_price = min(open_price, initial_price) * (1 - abs(np.random.normal(0, daily_vol * 0.5)))
        else:
            high_price = max(open_price, initial_price) * (1 + abs(np.random.normal(0, daily_vol * 0.5)))
            low_price = min(open_price, initial_price) * (1 - abs(np.random.normal(0, daily_vol)))
            
        # Ensure OHLC relationships
        high_price = max(high_price, open_price, initial_price)
        low_price = min(low_price, open_price, initial_price)
        
        return open_price, high_price, low_price

    def _generate_ohlc(self, prev_close, curr_close, daily_vol, regime):
        """Generate intraday OHLC prices"""
        # Higher gaps and ranges in volatile regimes
        gap_factor = 2.0 if regime in [self.regime_manager.BEAR, self.regime_manager.CORRECTION] else 1.5
        range_factor = 2.5 if regime in [self.regime_manager.BEAR, self.regime_manager.CORRECTION] else 1.8
        
        # Generate overnight gap
        gap_vol = daily_vol * gap_factor
        open_price = prev_close * (1 + np.random.normal(0, gap_vol))
        
        # Base range on volatility and regime
        daily_range = daily_vol * range_factor * prev_close
        
        # Generate high/low based on trend
        if curr_close > prev_close:
            high_price = max(open_price, curr_close) * (1 + abs(np.random.normal(0, daily_vol * 0.7)))
            low_price = min(open_price, curr_close) * (1 - abs(np.random.normal(0, daily_vol * 0.3)))
        else:
            high_price = max(open_price, curr_close) * (1 + abs(np.random.normal(0, daily_vol * 0.3)))
            low_price = min(open_price, curr_close) * (1 - abs(np.random.normal(0, daily_vol * 0.7)))
        
        # Ensure OHLC relationships and minimum range
        high_price = max(high_price, open_price, curr_close)
        low_price = min(low_price, open_price, curr_close)
        
        # Ensure minimum daily range
        min_range = prev_close * daily_vol * 0.2
        if high_price - low_price < min_range:
            mid_price = (high_price + low_price) / 2
            high_price = mid_price + min_range/2
            low_price = mid_price - min_range/2
        
        return open_price, high_price, low_price

    def _generate_volume(self, log_return, daily_vol, regime):
        """Generate trading volume based on price action and regime"""
        base_volume = 1_000_000
        
        # Volume factors
        vol_factor = (daily_vol * np.sqrt(self.trading_days_per_year) / self.default_bull_vol)
        return_factor = 1 + 2.5 * abs(log_return) / daily_vol if daily_vol > 0 else 1
        
        # Higher volumes in volatile regimes
        regime_factor = {
            self.regime_manager.BULL: 1.0,
            self.regime_manager.BEAR: 2.5,
            self.regime_manager.CORRECTION: 1.8,
            self.regime_manager.RECOVERY: 1.5
        }.get(regime, 1.0)
        
        # Random variation with higher skew
        noise = np.random.lognormal(0, 0.6)
        
        volume = base_volume * vol_factor * return_factor * regime_factor * noise
        
        # Add minimum volume
        min_volume = base_volume * 0.2
        return max(int(volume), int(min_volume))

    def _update_volatility_regime(self, current_regime, log_return, market_regime):
        """Update volatility regime multiplier"""
        # Target higher volatility in bear markets and corrections
        base_target = {
            self.regime_manager.BULL: 1.0,
            self.regime_manager.BEAR: 1.8,
            self.regime_manager.CORRECTION: 1.5,
            self.regime_manager.RECOVERY: 1.3
        }.get(market_regime, 1.0)
        
        # Adjust for large moves
        if abs(log_return) > 3 * self.default_bull_vol / np.sqrt(self.trading_days_per_year):
            base_target *= 1.2
        
        # Mean revert with some persistence
        return 0.95 * current_regime + 0.05 * base_target

    def plot_stock(self, df, ticker, save_path=None, show_volume=True):
        """Enhanced plotting with volume and better formatting"""
        if show_volume:
            fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), height_ratios=[3, 1])
        else:
            fig, ax1 = plt.subplots(figsize=(15, 8))
        
        # Plot price
        ax1.plot(df.index, df['Close'], 'k-', linewidth=1, label='Close')
        
        # Color backgrounds for regimes
        regime_colors = {
            self.regime_manager.BULL: 'lightgreen',
            self.regime_manager.BEAR: 'lightcoral',
            self.regime_manager.CORRECTION: 'lightyellow',
            self.regime_manager.RECOVERY: 'lightblue'
        }
        
        ymin, ymax = df['Close'].min(), df['Close'].max()
        padding = (ymax - ymin) * 0.1
        ax1.set_ylim(ymin - padding, ymax + padding)
        
        # Plot regime backgrounds
        for regime, color in regime_colors.items():
            mask = (df['Regime'] == regime)
            if mask.any():
                ax1.fill_between(
                    df.index, ymin - padding, ymax + padding,
                    where=mask, color=color, alpha=0.3, label=regime
                )
        
        # Add performance metrics
        performance_text = (
            f"Total Return: {df.attrs.get('total_return', 0)*100:.1f}%\n"
            f"Annual Return: {df.attrs.get('annualized_return', 0)*100:.1f}%\n"
            f"Sharpe Ratio: {df.attrs.get('sharpe_ratio', 0):.2f}"
        )
        ax1.text(
            0.02, 0.98, performance_text,
            transform=ax1.transAxes,
            verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8)
        )
        
        # Format price axis
        ax1.set_title(f'{ticker} Synthetic Price', fontsize=14)
        ax1.set_ylabel('Price')
        ax1.grid(True, alpha=0.3)
        ax1.legend(loc='upper left')
        
        if show_volume:
            # Plot volume
            volume_colors = np.where(df['Close'] >= df['Open'], 'green', 'red')
            ax2.bar(df.index, df['Volume'], color=volume_colors, alpha=0.5)
            ax2.set_ylabel('Volume')
            ax2.grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        if save_path:
            plt.savefig(save_path, dpi=150, bbox_inches='tight')
            plt.close()
        else:
            plt.show()

    def generate_portfolio(self, num_stocks=5, output_dir="synthetic_portfolio"):
        """Generate multiple stocks with output files"""
        os.makedirs(output_dir, exist_ok=True)
        portfolio = {}
        
        # Generate random tickers
        ticker_length = np.random.choice([3, 4], size=num_stocks)
        tickers = [
            ''.join(np.random.choice(list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), length))
            for length in ticker_length
        ]
        
        for ticker in tickers:
            # Generate data
            df = self.generate_stock_data(ticker=ticker)
            portfolio[ticker] = df
            
            # Save data and plots
            df.to_csv(os.path.join(output_dir, f"{ticker}.csv"))
            self.plot_stock(df, ticker, save_path=os.path.join(output_dir, f"{ticker}.png"))
            
            # Save enhanced metadata
            metadata = {
                'ticker': ticker,
                'start_date': df.index[0].strftime('%Y-%m-%d'),
                'end_date': df.index[-1].strftime('%Y-%m-%d'),
                'initial_price': float(df['Close'][0]),
                'final_price': float(df['Close'][-1]),
                'total_return': float(df.attrs['total_return'] * 100),
                'annualized_return': float(df.attrs['annualized_return'] * 100),
                'sharpe_ratio': float(df.attrs['sharpe_ratio']),
                'max_drawdown': float(self._calculate_max_drawdown(df['Close'])),
                'annualized_volatility': float(np.std(df['LogReturn']) * np.sqrt(self.trading_days_per_year) * 100),
                'regime_distribution': {
                    regime: int(np.sum(df['Regime'] == regime))
                    for regime in [self.regime_manager.BULL, self.regime_manager.BEAR,
                                 self.regime_manager.CORRECTION, self.regime_manager.RECOVERY]
                }
            }
            
            with open(os.path.join(output_dir, f"{ticker}_metadata.json"), 'w') as f:
                json.dump(metadata, f, indent=2)
        
        # Generate portfolio summary
        self._save_portfolio_summary(portfolio, output_dir)
        
        return portfolio

    def _save_portfolio_summary(self, portfolio, output_dir):
        """Generate and save portfolio-level statistics"""
        summary = {
            'number_of_stocks': len(portfolio),
            'date_range': {
                'start': min(df.index[0].strftime('%Y-%m-%d') for df in portfolio.values()),
                'end': max(df.index[-1].strftime('%Y-%m-%d') for df in portfolio.values())
            },
            'returns': {
                'average_annual_return': np.mean([df.attrs['annualized_return'] * 100 for df in portfolio.values()]),
                'best_performer': max((df.attrs['annualized_return'] * 100, ticker) for ticker, df in portfolio.items()),
                'worst_performer': min((df.attrs['annualized_return'] * 100, ticker) for ticker, df in portfolio.items())
            },
            'risk_metrics': {
                'average_sharpe': np.mean([df.attrs['sharpe_ratio'] for df in portfolio.values()]),
                'average_volatility': np.mean([
                    np.std(df['LogReturn']) * np.sqrt(self.trading_days_per_year) * 100 
                    for df in portfolio.values()
                ])
            }
        }
        
        with open(os.path.join(output_dir, "portfolio_summary.json"), 'w') as f:
            json.dump(summary, f, indent=2)

def main():
    """Example usage with enhanced output"""
    generator = SyntheticMarketGenerator(global_seed=42)
    
    # Generate a portfolio of stocks
    output_dir = "synthetic_market_data"
    portfolio = generator.generate_portfolio(num_stocks=10, output_dir=output_dir)
    
    # Print summary statistics
    print("\nPortfolio Generation Complete!")
    print(f"Generated {len(portfolio)} stocks in '{output_dir}'\n")
    
    # Calculate and display performance metrics
    total_returns = []
    annual_returns = []
    sharpe_ratios = []
    
    for ticker, df in portfolio.items():
        total_return = df.attrs['total_return'] * 100
        annual_return = df.attrs['annualized_return'] * 100
        sharpe = df.attrs['sharpe_ratio']
        
        total_returns.append(total_return)
        annual_returns.append(annual_return)
        sharpe_ratios.append(sharpe)
        
        print(f"{ticker}:")
        print(f"  Total Return: {total_return:.1f}%")
        print(f"  Annual Return: {annual_return:.1f}%")
        print(f"  Sharpe Ratio: {sharpe:.2f}")
        print()
    
    print("Portfolio Statistics:")
    print(f"  Average Annual Return: {np.mean(annual_returns):.1f}%")
    print(f"  Average Sharpe Ratio: {np.mean(sharpe_ratios):.2f}")
    print(f"  Return Range: {min(annual_returns):.1f}% to {max(annual_returns):.1f}%")

if __name__ == "__main__":
    main()