In [18]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings('ignore')

def get_auto_parameters(target_phases):
    """Automatically adjust all parameters based on target phase count"""
    base_phases = 10  # Our baseline
    
    if target_phases <= 5:
        # Very aggressive consolidation for 5 or fewer phases
        return {
            'window': 8,
            'min_phase_duration': 6,
            'slope_threshold_strong': 3.0,
            'slope_threshold_medium': 1.0,
            'price_threshold_strong': 40,
            'price_threshold_medium': 20,
            'slope_tolerance': 0.4,
            'merge_aggression': 2.0
        }
    elif target_phases <= 8:
        # Moderate consolidation for 6-8 phases
        return {
            'window': 7,
            'min_phase_duration': 4,
            'slope_threshold_strong': 2.5,
            'slope_threshold_medium': 0.8,
            'price_threshold_strong': 35,
            'price_threshold_medium': 15,
            'slope_tolerance': 0.3,
            'merge_aggression': 1.5
        }
    elif target_phases <= 12:
        # Light consolidation for 9-12 phases
        return {
            'window': 6,
            'min_phase_duration': 3,
            'slope_threshold_strong': 2.0,
            'slope_threshold_medium': 0.5,
            'price_threshold_strong': 30,
            'price_threshold_medium': 10,
            'slope_tolerance': 0.2,
            'merge_aggression': 1.0
        }
    else:
        # Minimal consolidation for 13+ phases
        return {
            'window': 5,
            'min_phase_duration': 2,
            'slope_threshold_strong': 1.5,
            'slope_threshold_medium': 0.3,
            'price_threshold_strong': 25,
            'price_threshold_medium': 8,
            'slope_tolerance': 0.1,
            'merge_aggression': 0.5
        }

def detect_trend_phases(ticker, params):
    """Detect trend phases with parameterized settings"""
    print(f"Analyzing {ticker} with target phases...")
    
    # Get historical data
    try:
        stock = yf.Ticker(ticker)
        hist_data = stock.history(period="max", interval="1mo")
        
        if hist_data.empty:
            print(f"{ticker} not found, using AAPL for demonstration...")
            stock = yf.Ticker("AAPL")
            hist_data = stock.history(period="max", interval="1mo")
    except:
        print("Using AAPL as fallback...")
        stock = yf.Ticker("AAPL")
        hist_data = stock.history(period="max", interval="1mo")
    
    # Calculate average price
    hist_data['Avg_Price'] = (hist_data['Open'] + hist_data['High'] + hist_data['Low'] + hist_data['Close']) / 4
    
    # Use parameterized window for trend detection
    hist_data['Trend_Slope'] = hist_data['Avg_Price'].rolling(window=params['window']).apply(
        lambda x: LinearRegression().fit(np.arange(len(x)).reshape(-1, 1), x).coef_[0] if len(x) == params['window'] else np.nan,
        raw=False
    )
    
    hist_data = hist_data.dropna()
    return hist_data

def classify_trend_with_params(slope, price_change_pct, params):
    """Classify trend using parameterized thresholds"""
    if slope > params['slope_threshold_strong'] or price_change_pct > params['price_threshold_strong']:
        return "Strong_Uptrend"
    elif slope > params['slope_threshold_medium'] or price_change_pct > params['price_threshold_medium']:
        return "Uptrend"
    elif slope < -params['slope_threshold_strong'] or price_change_pct < -params['price_threshold_strong']:
        return "Strong_Downtrend"
    elif slope < -params['slope_threshold_medium'] or price_change_pct < -params['price_threshold_medium']:
        return "Downtrend"
    else:
        return "Consolidation"

def identify_phases_with_params(hist_data, target_phases, params):
    """Identify phases using parameterized settings"""
    
    # First pass: identify initial trend segments
    hist_data['Raw_Trend_Slope'] = hist_data['Trend_Slope']
    hist_data['Raw_Trend_Type'] = hist_data['Trend_Slope'].apply(
        lambda x: "Up" if x > 0.2 else ("Down" if x < -0.2 else "Flat")
    )
    
    # Group consecutive months
    hist_data['Phase_Change'] = hist_data['Raw_Trend_Type'] != hist_data['Raw_Trend_Type'].shift(1)
    hist_data['Phase_Group'] = hist_data['Phase_Change'].cumsum()
    
    # Calculate phase statistics
    phases = []
    for group_num in hist_data['Phase_Group'].unique():
        phase_data = hist_data[hist_data['Phase_Group'] == group_num]
        
        # Use parameterized minimum duration
        if len(phase_data) < params['min_phase_duration']:
            continue
            
        # Calculate overall phase characteristics
        x = np.arange(len(phase_data)).reshape(-1, 1)
        y = phase_data['Avg_Price'].values
        overall_slope = LinearRegression().fit(x, y).coef_[0]
        price_change_pct = ((phase_data['Avg_Price'].iloc[-1] - phase_data['Avg_Price'].iloc[0]) / 
                           phase_data['Avg_Price'].iloc[0]) * 100
        
        trend_type = classify_trend_with_params(overall_slope, price_change_pct, params)
        
        phases.append({
            'Phase_Group': group_num,
            'Start_Date': phase_data.index[0],
            'End_Date': phase_data.index[-1],
            'Duration_Months': len(phase_data),
            'Overall_Slope': overall_slope,
            'Price_Change_Pct': price_change_pct,
            'Start_Price': phase_data['Avg_Price'].iloc[0],
            'End_Price': phase_data['Avg_Price'].iloc[-1],
            'Trend_Type': trend_type
        })
    
    phases_df = pd.DataFrame(phases)
    
    # If we already have fewer phases than target, return
    if len(phases_df) <= target_phases:
        return phases_df
    
    # Otherwise, merge phases to reach target
    return merge_phases_to_target(phases_df, target_phases, params)

def merge_phases_to_target(phases_df, target_phases, params):
    """Merge phases until we reach the target count"""
    
    while len(phases_df) > target_phases:
        # Find the best phases to merge based on multiple criteria
        best_merge_idx = None
        best_merge_score = float('inf')
        
        for i in range(len(phases_df) - 1):
            current = phases_df.iloc[i]
            next_phase = phases_df.iloc[i + 1]
            
            # Calculate merge score (lower is better)
            duration_score = (current['Duration_Months'] + next_phase['Duration_Months']) * 0.1
            
            # Trend compatibility (same trend type gets better score)
            trend_compatibility = 1 if current['Trend_Type'] == next_phase['Trend_Type'] else 3
            
            # Slope difference (smaller difference gets better score)
            slope_diff = abs(current['Overall_Slope'] - next_phase['Overall_Slope'])
            
            # Price level proximity (closer prices get better score)
            price_gap = abs(current['End_Price'] - next_phase['Start_Price']) / current['End_Price']
            
            merge_score = (duration_score + trend_compatibility + 
                          slope_diff * 10 + price_gap * 100) * params['merge_aggression']
            
            if merge_score < best_merge_score:
                best_merge_score = merge_score
                best_merge_idx = i
        
        if best_merge_idx is None:
            break
            
        # Merge the selected phases
        current = phases_df.iloc[best_merge_idx]
        next_phase = phases_df.iloc[best_merge_idx + 1]
        
        merged_phase = {
            'Phase_Group': current['Phase_Group'],
            'Start_Date': current['Start_Date'],
            'End_Date': next_phase['End_Date'],
            'Duration_Months': current['Duration_Months'] + next_phase['Duration_Months'],
            'Start_Price': current['Start_Price'],
            'End_Price': next_phase['End_Price'],
            'Trend_Type': f"{current['Trend_Type']}+{next_phase['Trend_Type']}" if current['Trend_Type'] != next_phase['Trend_Type'] else current['Trend_Type']
        }
        
        # Recalculate overall metrics for merged phase
        merged_phase['Price_Change_Pct'] = ((next_phase['End_Price'] - current['Start_Price']) / current['Start_Price']) * 100
        
        # For slope, approximate using the merged duration
        price_change_per_month = (next_phase['End_Price'] - current['Start_Price']) / merged_phase['Duration_Months']
        merged_phase['Overall_Slope'] = price_change_per_month / current['Start_Price'] if current['Start_Price'] != 0 else price_change_per_month
        
        # Replace merged phases
        new_phases = phases_df.drop([best_merge_idx, best_merge_idx + 1]).reset_index(drop=True)
        new_row = pd.DataFrame([merged_phase])
        new_phases = pd.concat([new_phases.iloc[:best_merge_idx], new_row, new_phases.iloc[best_merge_idx:]]).reset_index(drop=True)
        
        phases_df = new_phases
    
    return phases_df

def classify_final_trend_simple(phase):
    """Simple final trend classification"""
    if 'Strong_Uptrend' in str(phase['Trend_Type']):
        return "Strong_Uptrend"
    elif 'Uptrend' in str(phase['Trend_Type']):
        return "Uptrend"
    elif 'Strong_Downtrend' in str(phase['Trend_Type']):
        return "Strong_Downtrend"
    elif 'Downtrend' in str(phase['Trend_Type']):
        return "Downtrend"
    else:
        return "Consolidation"

def create_final_output(phases_df, ticker):
    """Create the final output table"""
    # Simplify trend names for final output
    phases_df['Final_Trend'] = phases_df.apply(classify_final_trend_simple, axis=1)
    
    # Create output
    phase_output = []
    for idx, phase in phases_df.iterrows():
        phase_name = f"Phase{idx+1}"
        start_date = phase['Start_Date'].strftime('%Y-%m-%d')
        end_date = phase['End_Date'].strftime('%Y-%m-%d')
        phase_output.append(f"{phase_name}({start_date}, {end_date})")
    
    result_string = f"{ticker}, " + ", ".join(phase_output)
    return result_string, phases_df

# Main execution with single parameter control
def analyze_stock_phases(ticker="HCAR.KA", target_phases=10):
    """
    Main analysis function with single parameter control
    
    Parameters:
    - ticker: Stock ticker symbol
    - target_phases: Desired number of phases (typically 5-15)
    """
    
    print("=" * 70)
    print(f"ADAPTIVE TREND PHASE ANALYSIS")
    print(f"Ticker: {ticker}")
    print(f"Target Phases: {target_phases}")
    print("=" * 70)
    
    # Step 1: Get auto-adjusted parameters based on target
    params = get_auto_parameters(target_phases)
    print(f"Auto-adjusted parameters:")
    print(f"  - Trend window: {params['window']} months")
    print(f"  - Min phase duration: {params['min_phase_duration']} months")
    print(f"  - Slope thresholds: {params['slope_threshold_medium']:.1f}/{params['slope_threshold_strong']:.1f}")
    print(f"  - Price thresholds: {params['price_threshold_medium']}%/{params['price_threshold_strong']}%")
    
    # Step 2: Get data and detect phases
    hist_data = detect_trend_phases(ticker, params)
    print(f"\nData analyzed: {len(hist_data)} months")
    print(f"Date range: {hist_data.index[0].strftime('%Y-%m')} to {hist_data.index[-1].strftime('%Y-%m')}")
    
    # Step 3: Identify phases with parameters
    phases_df = identify_phases_with_params(hist_data, target_phases, params)
    print(f"Phases identified: {len(phases_df)}")
    
    # Step 4: Create final output
    result_string, final_phases = create_final_output(phases_df, ticker)
    
    # Display results
    print("\n" + "=" * 70)
    print("FINAL PHASE TABLE")
    print("=" * 70)
    print(result_string)
    
    # Detailed analysis
    print("\n" + "=" * 70)
    print("DETAILED PHASE BREAKDOWN")
    print("=" * 70)
    
    for idx, phase in final_phases.iterrows():
        print(f"\nPhase {idx+1}:")
        print(f"  Period: {phase['Start_Date'].strftime('%Y-%m-%d')} to {phase['End_Date'].strftime('%Y-%m-%d')}")
        print(f"  Duration: {phase['Duration_Months']} months")
        print(f"  Trend: {phase['Final_Trend']}")
        print(f"  Price Change: {phase['Price_Change_Pct']:+.1f}%")
        print(f"  Price Range: {phase['Start_Price']:.2f} → {phase['End_Price']:.2f}")
        print(f"  Monthly Slope: {phase['Overall_Slope']:.4f}")

    # Summary
    print("\n" + "=" * 70)
    print("SUMMARY")
    print("=" * 70)
    print(f"Total analysis period: {len(hist_data)} months")
    print(f"Phases achieved: {len(final_phases)} (target: {target_phases})")
    print(f"Average phase duration: {final_phases['Duration_Months'].mean():.1f} months")
    
    trend_summary = final_phases['Final_Trend'].value_counts()
    print("\nPhase type distribution:")
    for trend_type, count in trend_summary.items():
        print(f"  {trend_type}: {count} phase(s)")

    return result_string, final_phases

# Example usage with different phase targets
if __name__ == "__main__":
    # SINGLE CONTROL PARAMETER - Change this number to get different phase counts
    TARGET_PHASES = 5  # Try 5, 8, 10, 12, etc.
    
    result, phases_df = analyze_stock_phases("KEL.KA", TARGET_PHASES)
    
    print(f"\n{'='*70}")
    print("QUICK USAGE GUIDE:")
    print(f"{'='*70}")
    print("To get different numbers of phases, simply change TARGET_PHASES:")
    print("  - 5 phases: Very consolidated, major trends only")
    print("  - 8 phases: Balanced consolidation")
    print("  - 10 phases: Moderate detail")
    print("  - 12 phases: More detailed trends")
    print("  - 15+ phases: High detail (may show noise)")

ADAPTIVE TREND PHASE ANALYSIS
Ticker: KEL.KA
Target Phases: 5
Auto-adjusted parameters:
  - Trend window: 8 months
  - Min phase duration: 6 months
  - Slope thresholds: 1.0/3.0
  - Price thresholds: 20%/40%
Analyzing KEL.KA with target phases...

Data analyzed: 208 months
Date range: 2008-08 to 2025-11
Phases identified: 5

FINAL PHASE TABLE
KEL.KA, Phase1(2009-05-01, 2012-03-01), Phase2(2012-04-01, 2017-02-01), Phase3(2017-05-01, 2020-05-01), Phase4(2020-08-01, 2023-11-01), Phase5(2023-12-01, 2025-08-01)

DETAILED PHASE BREAKDOWN

Phase 1:
  Period: 2009-05-01 to 2012-03-01
  Duration: 35 months
  Trend: Consolidation
  Price Change: +2.7%
  Price Range: 2.73 → 2.80
  Monthly Slope: -0.0278

Phase 2:
  Period: 2012-04-01 to 2017-02-01
  Duration: 49 months
  Trend: Strong_Uptrend
  Price Change: +180.0%
  Price Range: 3.59 → 10.06
  Monthly Slope: 0.0367

Phase 3:
  Period: 2017-05-01 to 2020-05-01
  Duration: 30 months
  Trend: Downtrend
  Price Change: -60.1%
  Price Range: 7.64 → 