In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import linregress
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

TICKER = 'ADP'

print(f"Downloading daily data for {TICKER} from January 1, 2019...")
data_daily = yf.download(TICKER, start='2019-01-01', interval='1d', progress=False)

if isinstance(data_daily.columns, pd.MultiIndex):
    data_daily.columns = data_daily.columns.droplevel(1)

# Resample to weekly, ending on Fridays
data = data_daily.resample('W-FRI').agg({
    'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last',
    'Volume': 'sum'
}).dropna()

print(f"Total weeks: {len(data)}")
print(f"Range: {data.index[0].date()} to {data.index[-1].date()}")
print(f"\nLast few rows:")
print(data.tail())

In [None]:
"""
STEP 1: Analyze most recent 10 weeks to generate X-Y-D/U pattern notation.
"""

current_window = data.tail(10).copy()

# Count up/down weeks (Close vs prior Close)
current_window['Up'] = (current_window['Close'] > current_window['Close'].shift(1)).astype(int)
up_weeks = current_window['Up'].sum()
down_weeks = 10 - up_weeks

# Calculate trajectory
close_prices = current_window['Close'].values.flatten()
weeks_index = np.arange(len(close_prices))
slope, intercept, r_value, p_value, std_err = linregress(weeks_index, close_prices)
trajectory = 'U' if slope > 0 else 'D'

# Entry price = closing price at END of pattern
entry_price = float(current_window.iloc[-1]['Close'].iloc[0] if hasattr(current_window.iloc[-1]['Close'], 'iloc') else current_window.iloc[-1]['Close'])

current_pattern = f"{up_weeks}-{down_weeks}-{trajectory}"

print("=" * 60)
print("STEP 1: CURRENT 10-WEEK SEQUENCE")
print("=" * 60)
print(f"Pattern: {current_pattern}")
print(f"Up Weeks: {up_weeks}")
print(f"Down Weeks: {down_weeks}")
print(f"Trajectory: {trajectory} (slope: {slope:.4f})")
print(f"Entry Price: ${entry_price:.2f}")
print(f"Period: {current_window.index[0].date()} to {current_window.index[-1].date()}")

In [None]:
"""
STEP 2: Scan historical data for matching pattern instances.
"""

def calculate_pattern(window_df):
    if len(window_df) < 10:
        return None
    up = (window_df['Close'] > window_df['Close'].shift(1)).sum()
    down = 10 - up
    closes = window_df['Close'].values.flatten()
    idx = np.arange(len(closes))
    slope, _, _, _, _ = linregress(idx, closes)
    traj = 'U' if slope > 0 else 'D'
    return f"{up}-{down}-{traj}"

patterns_list = []

for i in range(len(data) - 9):
    window = data.iloc[i:i+10]
    pattern = calculate_pattern(window)
    if pattern:
        pattern_end_price = float(window.iloc[-1]['Close'].iloc[0] if hasattr(window.iloc[-1]['Close'], 'iloc') else window.iloc[-1]['Close'])
        patterns_list.append({
            'pattern': pattern,
            'start_date': window.index[0],
            'end_date': window.index[-1],
            'pattern_end_price': pattern_end_price,
            'window_idx': i
        })

patterns_df = pd.DataFrame(patterns_list)
matches = patterns_df[patterns_df['pattern'] == current_pattern]
total_patterns = len(patterns_df)
match_count = len(matches)
frequency = (match_count / total_patterns) * 100 if total_patterns > 0 else 0

print("=" * 60)
print("STEP 2: HISTORICAL PATTERN OCCURRENCES")
print("=" * 60)
print(f"Current Pattern: {current_pattern}")
print(f"Total Historical Windows: {total_patterns}")
print(f"Matching Patterns: {match_count}")
print(f"Frequency: {frequency:.2f}%")
print(f"Rarity: {'Common' if frequency > 5 else 'Moderate' if frequency > 2 else 'Rare'}")

if match_count > 0:
    print(f"\nSample matches:")
    print(matches[['start_date', 'end_date', 'pattern_end_price']].head(10))
else:
    print(f"\n‚ö†Ô∏è  No matches found - analysis will use baseline only")

In [None]:
"""
STEP 3 & 4 (FIXED): Calculate baseline and pattern-specific distributions using actual prices.

KEY FIXES:
1. Use ALL weeks 1-10 instead of just week 10
2. Use ACTUAL historical prices instead of projecting onto entry_price
3. Consistent GMM methodology for both baseline and pattern
4. Display the actual baseline used in calculations
"""

FORWARD_WEEKS = 10

def get_price_clustering_gmm(prices_array, n_components_range=(1, 4)):
    """
    Uses GMM with BIC comparison to find optimal clustering.
    Returns the dominant cluster price (highest weight component).
    """
    if len(prices_array) < 20:
        if len(prices_array) > 0:
            return np.median(prices_array), 1, None
        else:
            return np.nan, 0, None
    
    X = prices_array.reshape(-1, 1)
    
    best_gmm = None
    best_bic = np.inf
    
    for n_components in range(n_components_range[0], n_components_range[1]):
        gmm = GaussianMixture(n_components=n_components, random_state=42)
        gmm.fit(X)
        bic = gmm.bic(X)
        if bic < best_bic:
            best_bic = bic
            best_gmm = gmm
    
    # Find dominant cluster (highest weight)
    dominant_idx = np.argmax(best_gmm.weights_)
    cluster_price = best_gmm.means_[dominant_idx][0]
    
    return cluster_price, best_gmm.n_components, best_gmm

# === BASELINE: Collect ALL future prices from ALL patterns ===
baseline_future_prices = []

for idx, row in patterns_df.iterrows():
    window_end_idx = row['window_idx'] + 9  # Last week of pattern
    
    # Collect ALL prices from weeks 1-10 ahead (not just week 10)
    for week_offset in range(1, FORWARD_WEEKS + 1):
        future_idx = window_end_idx + week_offset
        if future_idx < len(data):
            future_price = data.iloc[future_idx]['Close']
            baseline_future_prices.append(float(future_price.iloc[0] if hasattr(future_price, 'iloc') else future_price))

baseline_prices_array = np.array(baseline_future_prices)
baseline_cluster_price, baseline_n_components, baseline_gmm = get_price_clustering_gmm(baseline_prices_array)

print("=" * 60)
print("STEP 3: BASELINE EXPECTATIONS (FIXED)")
print("=" * 60)
print(f"Sample Size: {len(baseline_prices_array)} price points")
print(f"  ({len(patterns_df)} patterns √ó {FORWARD_WEEKS} weeks)")
print(f"Forward Period: {FORWARD_WEEKS} weeks")
print(f"\nBaseline Statistics (using ACTUAL historical prices):")
print(f"  Mean: ${baseline_prices_array.mean():.2f}")
print(f"  Median: ${np.median(baseline_prices_array):.2f}")
print(f"  GMM Components: {baseline_n_components}")
print(f"  Baseline Cluster: ${baseline_cluster_price:.2f}")

# === PATTERN-SPECIFIC: Collect ALL future prices from MATCHING patterns ===
pattern_future_prices = []

for idx, row in matches.iterrows():
    window_end_idx = row['window_idx'] + 9
    
    # Collect ALL prices from weeks 1-10 ahead
    for week_offset in range(1, FORWARD_WEEKS + 1):
        future_idx = window_end_idx + week_offset
        if future_idx < len(data):
            future_price = data.iloc[future_idx]['Close']
            pattern_future_prices.append(float(future_price.iloc[0] if hasattr(future_price, 'iloc') else future_price))

if len(pattern_future_prices) > 0:
    pattern_prices_array = np.array(pattern_future_prices)
    pattern_cluster_price, pattern_n_components, pattern_gmm = get_price_clustering_gmm(pattern_prices_array)
    
    print("\n" + "=" * 60)
    print("STEP 4: SEQUENCE-SPECIFIC DISTRIBUTION (FIXED)")
    print("=" * 60)
    print(f"Pattern: {current_pattern}")
    print(f"Sample Size: {len(pattern_prices_array)} price points")
    print(f"  ({match_count} patterns √ó {FORWARD_WEEKS} weeks)")
    print(f"GMM Components: {pattern_n_components}")
    print(f"\nPattern Statistics (using ACTUAL historical prices):")
    print(f"  Mean: ${pattern_prices_array.mean():.2f}")
    print(f"  Median: ${np.median(pattern_prices_array):.2f}")
    print(f"  Pattern Cluster: ${pattern_cluster_price:.2f}")
    
    # Enhanced visualization
    plt.figure(figsize=(14, 7))
    sns.kdeplot(data=baseline_prices_array, fill=True, label='Baseline (All Patterns)', 
                alpha=0.3, color='blue')
    sns.kdeplot(data=pattern_prices_array, fill=True, label=f'Pattern ({current_pattern})',
                alpha=0.5, color='red')
    plt.axvline(baseline_cluster_price, color='blue', linestyle='--',
                label=f'Baseline GMM: ${baseline_cluster_price:.2f}')
    plt.axvline(pattern_cluster_price, color='red', linestyle='--',
                label=f'Pattern GMM: ${pattern_cluster_price:.2f}')
    plt.axvline(entry_price, color='green', linestyle='-',
                label=f'Entry Price: ${entry_price:.2f}')
    
    # Plot all GMM means for transparency
    if pattern_n_components > 1:
        for i, mean in enumerate(pattern_gmm.means_):
            weight = pattern_gmm.weights_[i]
            plt.axvline(mean[0], color='red', linestyle=':', alpha=0.5,
                       label=f'Component {i+1}: ${mean[0]:.2f} (weight: {weight:.2%})')
    
    plt.xlabel(f'Price (Weeks 1-{FORWARD_WEEKS} ahead)')
    plt.ylabel('Density')
    plt.title(f'Distribution Comparison - {current_pattern} (GMM Clustering - FIXED)\n' +
              f'Using {len(baseline_prices_array)} baseline prices vs {len(pattern_prices_array)} pattern prices')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()
    
else:
    pattern_cluster_price = np.nan
    pattern_prices_array = np.array([])
    print("\nNo pattern matches found - cannot calculate pattern-specific distribution.")

In [None]:
"""
STEP 5: Calculate delta and exceedance metrics (FIXED).
"""

if len(pattern_future_prices) > 0 and not np.isnan(pattern_cluster_price):
    delta_pct = ((pattern_cluster_price - baseline_cluster_price) / baseline_cluster_price) * 100
    
    exceedance_count = (pattern_prices_array > entry_price).sum()
    exceedance_ratio = (exceedance_count / len(pattern_prices_array)) * 100
    
    terminal_median_price = np.median(pattern_prices_array)
    
    print("=" * 60)
    print("STEP 5: DELTA IN PRICE DENSITY DYNAMICS (FIXED)")
    print("=" * 60)
    print(f"Baseline Cluster: ${baseline_cluster_price:.2f}")
    print(f"Pattern Cluster: ${pattern_cluster_price:.2f}")
    print(f"Delta: {delta_pct:+.2f}%")
    print(f"Signal: {'BULLISH - Positive delta' if delta_pct > 0 else 'BEARISH - Negative delta'}")
    
    print("\n" + "=" * 60)
    print("STEP 6: SUPPORTING METRICS")
    print("=" * 60)
    print(f"Entry Price: ${entry_price:.2f}")
    print(f"Exceedance Ratio: {exceedance_ratio:.1f}%")
    print(f"Terminal Median: ${terminal_median_price:.2f}")
    print(f"Sample Size: {len(pattern_prices_array)} price points from {match_count} pattern instances")
    
    # Compare to baseline statistics
    baseline_exceedance = (baseline_prices_array > entry_price).sum()
    baseline_exceedance_ratio = (baseline_exceedance / len(baseline_prices_array)) * 100
    print(f"\nComparison to Baseline:")
    print(f"  Baseline Exceedance Ratio: {baseline_exceedance_ratio:.1f}%")
    print(f"  Pattern Exceedance Ratio: {exceedance_ratio:.1f}%")
    print(f"  Difference: {exceedance_ratio - baseline_exceedance_ratio:+.1f}%")
    
    METRICS = {
        'delta_pct': delta_pct,
        'exceedance_ratio': exceedance_ratio,
        'pattern_cluster_price': pattern_cluster_price,
        'terminal_median_price': terminal_median_price,
        'baseline_cluster_price': baseline_cluster_price,
        'pattern_mean': pattern_prices_array.mean(),
        'baseline_mean': baseline_prices_array.mean(),
        'pattern_n_components': pattern_n_components,
        'baseline_n_components': baseline_n_components
    }
else:
    METRICS = None
    print("\nNo pattern matches - cannot calculate delta.")

In [None]:
"""
STEP 7: Design bull call spread based on positive delta exploitation (FIXED).
"""

if METRICS and METRICS['delta_pct'] > 0:
    last_date = data.index[-1]
    expiration_date = last_date + timedelta(weeks=FORWARD_WEEKS)
    
    # Strike selection: center on pattern cluster price
    pattern_cluster = METRICS['pattern_cluster_price']
    
    # Short strike targets the pattern cluster (where price likely lands)
    short_strike = round(pattern_cluster * 2) / 2  # Round to $0.50
    
    # Long strike 5-10 points below for spread width
    spread_width = 10 if entry_price > 200 else 5
    long_strike = short_strike - spread_width
    
    # Alternative conservative spread using baseline
    conservative_long = round(baseline_cluster_price * 2) / 2
    conservative_short = conservative_long + spread_width
    
    upside_to_cluster = ((pattern_cluster - entry_price) / entry_price) * 100
    
    print("=" * 60)
    print(f"STEP 7: BULL CALL SPREAD RECOMMENDATION - {TICKER} (FIXED)")
    print("=" * 60)
    print(f"\nüìä PATTERN: {current_pattern}")
    print(f"   Frequency: {frequency:.2f}% ({'Ultra-rare' if frequency < 3 else 'Rare' if frequency < 5 else 'Moderate'})")
    
    print(f"\nüìà KEY METRICS (FIXED METHODOLOGY):")
    print(f"   Positive Delta: +{METRICS['delta_pct']:.2f}%")
    print(f"   Baseline Cluster: ${METRICS['baseline_cluster_price']:.2f} ({baseline_n_components} component(s))")
    print(f"   Pattern Cluster: ${pattern_cluster:.2f} ({pattern_n_components} component(s))")
    print(f"   Exceedance Ratio: {METRICS['exceedance_ratio']:.1f}%")
    print(f"   Sample: {len(pattern_prices_array)} prices from {match_count} instances")
    
    print(f"\nüí∞ PRIMARY STRATEGY - AGGRESSIVE:")
    print(f"   Long Strike:  ${long_strike:.2f} (BUY)")
    print(f"   Short Strike: ${short_strike:.2f} (SELL)")
    print(f"   Spread Width: ${spread_width:.2f}")
    print(f"   Max Profit: ${spread_width:.2f} per contract")
    print(f"   Expiration: {expiration_date.strftime('%Y-%m-%d')}")
    print(f"   Target: ${pattern_cluster:.2f} ({upside_to_cluster:+.1f}% from entry)")
    
    print(f"\nüí° ALTERNATIVE - CONSERVATIVE:")
    print(f"   Long Strike:  ${conservative_long:.2f} (BUY)")
    print(f"   Short Strike: ${conservative_short:.2f} (SELL)")
    print(f"   Rationale: Lower strikes for higher probability of profit")
    
    print(f"\nüéØ PRICE LEVELS:")
    print(f"   Current Entry: ${entry_price:.2f}")
    print(f"   Pattern Target: ${pattern_cluster:.2f}")
    print(f"   Terminal Median: ${METRICS['terminal_median_price']:.2f}")
    print(f"   Baseline: ${METRICS['baseline_cluster_price']:.2f}")
    
    print(f"\nüìä EDGE CALCULATION:")
    print(f"   Pattern shows {METRICS['delta_pct']:.1f}% clustering improvement over baseline")
    print(f"   Historical probability: ~{METRICS['exceedance_ratio']:.0f}% of reaching above entry")
    print(f"   Exploits positive delta in price density dynamics")
    
    print("\n" + "=" * 60)
    print("TRADE MANAGEMENT:")
    print("‚Ä¢ Enter when spread offers 2:1+ reward:risk")
    print("‚Ä¢ Target 50-80% max profit (close early)")
    print("‚Ä¢ Stop loss: -50% of debit paid")
    print("‚Ä¢ Monitor for pattern invalidation")
    print("=" * 60)
    
    print("\n" + "=" * 60)
    print("METHODOLOGY FIXES APPLIED:")
    print("=" * 60)
    print("‚úì Uses ALL weeks 1-10 (not just week 10)")
    print("‚úì Uses ACTUAL historical prices (no projection bias)")
    print("‚úì Consistent GMM methodology for baseline & pattern")
    print("‚úì Displays actual baseline used in calculations")
    print(f"‚úì 10x more data points: {len(pattern_prices_array)} vs {match_count}")
    print("=" * 60)
    
elif METRICS and METRICS['delta_pct'] < 0:
    print("=" * 60)
    print(f"NO BULLISH TRADE - Negative Delta")
    print("=" * 60)
    print(f"Pattern shows {METRICS['delta_pct']:.2f}% negative delta")
    print(f"Pattern cluster (${METRICS['pattern_cluster_price']:.2f}) below baseline (${METRICS['baseline_cluster_price']:.2f})")
    print("\nConsider: Bearish strategies or avoid this trade")
    
else:
    print("\n‚ö†Ô∏è  Insufficient pattern data for strategy recommendation")