In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from scipy.signal import argrelextrema
import io

# --- 1. Data Loading and Preparation ---
df = pd.read_csv("/Users/apple/msft-30yr-data.csv")


print("Data loaded and prepared. Shape:", df.shape)
print(df.head())

Data loaded and prepared. Shape: (7550, 8)
                        Date      Open      High       Low     Close  \
0  1995-06-19 00:00:00-04:00  3.342796  3.443966  3.328429  3.440972   
1  1995-06-20 00:00:00-04:00  3.462523  3.500836  3.438577  3.500836   
2  1995-06-21 00:00:00-04:00  3.519992  3.539148  3.448155  3.467311   
3  1995-06-22 00:00:00-04:00  3.472101  3.524781  3.472101  3.519992   
4  1995-06-23 00:00:00-04:00  3.476890  3.531965  3.457733  3.491257   

     Volume  Dividends  Stock Splits  
0  78982400        0.0           0.0  
1  56670400        0.0           0.0  
2  57224000        0.0           0.0  
3  46296000        0.0           0.0  
4  46356800        0.0           0.0  


In [6]:
# --- 2. Define Parameters for Double Bottom Identification ---
ORDER = 10 # Window for local min/max, e.g., 10 days on each side
PRICE_TOLERANCE_BOTTOMS = 0.03 # Bottoms within 3% of each other
MIN_DAYS_BETWEEN_BOTTOMS = 20
MAX_DAYS_BETWEEN_BOTTOMS = 120 # Approx 6 months
PEAK_SIGNIFICANCE = 0.05 # Peak P1 must be at least 5% higher than the average of B1 and B2
DOWNTREND_LOOKBACK = 30 # Days to check for prior downtrend
DOWNTREND_DROP_PCT = 0.05 # B1 must be at least 5% lower than price 30 days prior

BREAKOUT_CONFIRMATION_DAYS = 20 # Look for breakout within 20 days after B2
BREAKOUT_MARGIN = 0.01 # Price must close 1% above neckline for breakout
TARGET_PROFIT_DAYS = 60 # Look for target profit within 60 days after breakout
TARGET_PROFIT_PCT = 0.10 # Target profit of 10% from neckline
STOP_LOSS_PCT_FROM_BOTTOMS = 0.03 # Stop loss 3% below the average of the two bottoms

# Use 'Adj Close' for calculations
price_series = df['Close']

# --- 3. Find Local Minima and Maxima ---
# Smooth the series slightly to reduce noise for peak/trough detection
# smoothed_price = price_series.rolling(window=3).mean().dropna()
# For simplicity with argrelextrema, we'll use the original series, but smoothing can be beneficial.
# argrelextrema needs a numpy array.
local_min_indices = argrelextrema(price_series.values, np.less_equal, order=ORDER)[0]
local_max_indices = argrelextrema(price_series.values, np.greater_equal, order=ORDER)[0]

# Convert indices back to dates for easier handling
local_min_dates = price_series.index[local_min_indices]
local_max_dates = price_series.index[local_max_indices]



In [10]:
# --- 4. Identify Potential Double Bottom Patterns ---
potential_patterns = [] # Store (B1_date, P1_date, B2_date, neckline_price, avg_bottom_price)

price_series.index = pd.to_datetime(price_series.index)

for i in range(len(local_min_dates)):
    b1_idx = local_min_indices[i]
    b1_date = price_series.index[b1_idx]
    b1_price = price_series.iloc[b1_idx]

    # Check for prior downtrend
    if b1_idx < DOWNTREND_LOOKBACK:
        continue
    price_before_b1 = price_series.iloc[b1_idx - DOWNTREND_LOOKBACK]
    if b1_price > price_before_b1 * (1 - DOWNTREND_DROP_PCT):
        continue # Not a significant enough downtrend

    # Find next peak (P1) after B1
    possible_p1_indices = local_max_indices[local_max_indices > b1_idx]
    if len(possible_p1_indices) == 0:
        continue
    p1_idx = possible_p1_indices[0]
    p1_date = price_series.index[p1_idx]
    p1_price = price_series.iloc[p1_idx]

    # Peak P1 must be significantly higher than B1
    if p1_price < b1_price * (1 + PEAK_SIGNIFICANCE):
        continue

    # Find next bottom (B2) after P1
    possible_b2_indices = local_min_indices[local_min_indices > p1_idx]
    if len(possible_b2_indices) == 0:
        continue
    
    for j in range(len(possible_b2_indices)):
        b2_idx = possible_b2_indices[j]
        b2_date = price_series.index[b2_idx]
        b2_price = price_series.iloc[b2_idx]

        # Check time between bottoms
        days_between_bottoms = (b2_date - b1_date).days
        if not (MIN_DAYS_BETWEEN_BOTTOMS <= days_between_bottoms <= MAX_DAYS_BETWEEN_BOTTOMS):
            continue

        # Check price tolerance between bottoms
        if abs(b1_price - b2_price) / min(b1_price, b2_price) > PRICE_TOLERANCE_BOTTOMS:
            continue
            
        # Check if peak P1 is significantly higher than the average of B1 and B2
        avg_bottom_price = (b1_price + b2_price) / 2
        if p1_price < avg_bottom_price * (1 + PEAK_SIGNIFICANCE):
            continue
            
        # Ensure B2 is not significantly lower than B1 (already covered by tolerance, but good check)
        # and P1 is indeed between B1 and B2 in time.
        if not (b1_date < p1_date < b2_date):
            continue

        neckline_price = p1_price
        potential_patterns.append({
            "B1_date": b1_date, "B1_price": b1_price,
            "P1_date": p1_date, "P1_price": p1_price, # Neckline level
            "B2_date": b2_date, "B2_price": b2_price,
            "Neckline": neckline_price,
            "Avg_Bottom_Price": avg_bottom_price
        })
        break # Found a suitable B2 for this B1-P1, move to next B1

print(f"\nFound {len(potential_patterns)} potential Double Bottom structures starting to show.")



Found 0 potential Double Bottom structures starting to show.


In [11]:
# --- 5. Classify Patterns as True or False Signals ---
true_signals = []
false_signals = []
pattern_starts_showing_timestamps = []

for pattern in potential_patterns:
    pattern_starts_showing_timestamps.append(pattern['B2_date'])
    
    # Define the period to check for breakout and target
    breakout_check_start_date = pattern['B2_date']
    breakout_check_end_date = pattern['B2_date'] + pd.Timedelta(days=BREAKOUT_CONFIRMATION_DAYS)
    target_check_end_date = breakout_check_end_date + pd.Timedelta(days=TARGET_PROFIT_DAYS)
    
    # Slice the DataFrame for the relevant period after B2
    relevant_data_breakout = price_series[breakout_check_start_date:breakout_check_end_date]
    
    breakout_occurred = False
    breakout_date = None

    # Check for breakout
    for date, price in relevant_data_breakout.items():
        if price > pattern['Neckline'] * (1 + BREAKOUT_MARGIN):
            breakout_occurred = True
            breakout_date = date
            break
            
    if breakout_occurred:
        relevant_data_target = price_series[breakout_date:target_check_end_date]
        target_hit = False
        stop_loss_hit = False
        stop_loss_level = pattern['Avg_Bottom_Price'] * (1 - STOP_LOSS_PCT_FROM_BOTTOMS)
        target_price = pattern['Neckline'] * (1 + TARGET_PROFIT_PCT)

        for date, price in relevant_data_target.items():
            if price <= stop_loss_level:
                stop_loss_hit = True
                break
            if price >= target_price:
                target_hit = True
                break
        
        if target_hit and not stop_loss_hit:
            true_signals.append({**pattern, "Outcome_Date": date, "Outcome_Price": price, "Signal_Type": "True"})
        else:
            false_signals.append({**pattern, "Outcome_Date": date if stop_loss_hit or target_hit else target_check_end_date, 
                                  "Outcome_Price": price if stop_loss_hit or target_hit else price_series[target_check_end_date:target_check_end_date+pd.Timedelta(days=1)].iloc[0] if len(price_series[target_check_end_date:target_check_end_date+pd.Timedelta(days=1)]) > 0 else price_series.iloc[-1], # Use last known price if window ends
                                  "Signal_Type": "False - Stop Loss Hit" if stop_loss_hit else "False - Target Not Hit"})
    else:
        false_signals.append({**pattern, "Outcome_Date": breakout_check_end_date, 
                              "Outcome_Price": price_series[breakout_check_end_date:breakout_check_end_date+pd.Timedelta(days=1)].iloc[0] if len(price_series[breakout_check_end_date:breakout_check_end_date+pd.Timedelta(days=1)]) > 0 else price_series.iloc[-1],
                              "Signal_Type": "False - No Breakout"})

print(f"\nTimestamps when pattern starts showing (B2 date): {len(pattern_starts_showing_timestamps)} instances")
# for ts in pattern_starts_showing_timestamps:
# print(ts.strftime('%Y-%m-%d'))

print(f"\nTrue signals (pattern confirmed and target met): {len(true_signals)} instances")
# for signal in true_signals:
# print(f"B1: {signal['B1_date'].strftime('%Y-%m-%d')}, P1: {signal['P1_date'].strftime('%Y-%m-%d')}, B2: {signal['B2_date'].strftime('%Y-%m-%d')}, Outcome: {signal['Outcome_Date'].strftime('%Y-%m-%d')}")

print(f"\nFalse signals: {len(false_signals)} instances")
# for signal in false_signals:
# print(f"B1: {signal['B1_date'].strftime('%Y-%m-%d')}, P1: {signal['P1_date'].strftime('%Y-%m-%d')}, B2: {signal['B2_date'].strftime('%Y-%m-%d')}, Reason: {signal['Signal_Type']}")



Timestamps when pattern starts showing (B2 date): 0 instances

True signals (pattern confirmed and target met): 0 instances

False signals: 0 instances


In [12]:
# --- 6. Visualization ---
def plot_pattern(df_full, pattern_info, title_suffix):
    b1_d, p1_d, b2_d = pattern_info['B1_date'], pattern_info['P1_date'], pattern_info['B2_date']
    neckline = pattern_info['Neckline']
    
    # Determine plot range: from some days before B1 to some days after outcome or B2
    start_plot_date = b1_d - pd.Timedelta(days=60)
    if 'Outcome_Date' in pattern_info and pd.notna(pattern_info['Outcome_Date']):
         end_plot_date = pattern_info['Outcome_Date'] + pd.Timedelta(days=30)
    else: # For patterns that didn't even breakout
        end_plot_date = b2_d + pd.Timedelta(days=BREAKOUT_CONFIRMATION_DAYS + TARGET_PROFIT_DAYS)


    # Ensure dates are within the DataFrame's range
    start_plot_date = max(start_plot_date, df_full.index.min())
    end_plot_date = min(end_plot_date, df_full.index.max())
    
    plot_data = df_full[start_plot_date:end_plot_date]
    if plot_data.empty:
        print(f"Not enough data to plot for pattern starting {b1_d.strftime('%Y-%m-%d')}")
        return

    plt.figure(figsize=(12, 6))
    plt.plot(plot_data.index, plot_data['Adj Close'], label='Adj Close')
    
    # Plot B1, P1, B2
    plt.scatter([b1_d, p1_d, b2_d], 
                [pattern_info['B1_price'], pattern_info['P1_price'], pattern_info['B2_price']], 
                color='red', s=50, zorder=5, label='B1/P1/B2')
    
    # Plot Neckline
    plt.axhline(y=neckline, color='orange', linestyle='--', label=f'Neckline ({neckline:.2f})')
    
    # Plot Average Bottom Price Line (potential support)
    avg_bottom_price = pattern_info['Avg_Bottom_Price']
    plt.axhline(y=avg_bottom_price, color='green', linestyle=':', label=f'Avg Bottom ({avg_bottom_price:.2f})')

    if pattern_info['Signal_Type'].startswith("True"):
        plt.title(f"True Double Bottom: {b1_d.strftime('%Y-%m-%d')} to {b2_d.strftime('%Y-%m-%d')} - {title_suffix}")
        plt.scatter(pattern_info['Outcome_Date'], pattern_info['Outcome_Price'], color='green', marker='^', s=100, label='Target Met')
    elif pattern_info['Signal_Type'].startswith("False"):
        plt.title(f"False Double Bottom: {b1_d.strftime('%Y-%m-%d')} to {b2_d.strftime('%Y-%m-%d')} - {title_suffix}")
        if "Stop Loss Hit" in pattern_info['Signal_Type']:
             plt.scatter(pattern_info['Outcome_Date'], pattern_info['Outcome_Price'], color='purple', marker='v', s=100, label='Stop Loss Hit')
        elif "No Breakout" in pattern_info['Signal_Type']:
             plt.text(pattern_info['B2_date'] + pd.Timedelta(days=5), pattern_info['Neckline'], "No Breakout", color='red')
        else: # Target not hit
             plt.scatter(pattern_info['Outcome_Date'], pattern_info['Outcome_Price'], color='blue', marker='x', s=100, label='Target Not Hit')


    plt.xlabel("Date")
    plt.ylabel("Adj Close Price")
    plt.legend()
    plt.grid(True)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=max(1, (end_plot_date - start_plot_date).days // 180))) # Adjust interval
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Plot a few examples
print("\n--- Visualizing Examples ---")
num_examples_to_plot = 2 

print("\nPlotting True Signals Examples:")
for i, signal in enumerate(true_signals):
    if i < num_examples_to_plot:
        plot_pattern(df, signal, f"True Signal Example {i+1}")
    else:
        break
if not true_signals:
    print("No true signals found to plot.")

print("\nPlotting False Signals Examples:")
for i, signal in enumerate(false_signals):
    if i < num_examples_to_plot:
        plot_pattern(df, signal, f"False Signal Example {i+1}")
    else:
        break
if not false_signals:
    print("No false signals found to plot.")

# --- 7. Insights Explanation ---
# This section will be populated with the analysis results.
# (Done after execution and reviewing the numbers)
print("\n--- Analysis Insights ---")
total_patterns_identified = len(potential_patterns)
num_true_signals = len(true_signals)
num_false_signals = len(false_signals)

print(f"Total potential Double Bottom patterns identified (B1-P1-B2 structure): {total_patterns_identified}")
print(f"Number of True Signals (breakout and target met): {num_true_signals}")
print(f"Number of False Signals: {num_false_signals}")

if total_patterns_identified > 0:
    true_signal_rate = (num_true_signals / total_patterns_identified) * 100
    print(f"Success Rate (True Signals / Total Potential Patterns): {true_signal_rate:.2f}%")
else:
    print("No potential patterns were identified to calculate a success rate.")

print("\nDetailed Timestamps for True Signals (B2 Date - Pattern Start):")
for signal in true_signals:
    print(f"  - B1: {signal['B1_date'].strftime('%Y-%m-%d')}, P1: {signal['P1_date'].strftime('%Y-%m-%d')}, B2: {signal['B2_date'].strftime('%Y-%m-%d')}, Neckline: {signal['Neckline']:.2f}, Outcome Date: {signal['Outcome_Date'].strftime('%Y-%m-%d')}")

print("\nDetailed Timestamps for False Signals (B2 Date - Pattern Start) & Reason:")
for signal in false_signals:
    print(f"  - B1: {signal['B1_date'].strftime('%Y-%m-%d')}, P1: {signal['P1_date'].strftime('%Y-%m-%d')}, B2: {signal['B2_date'].strftime('%Y-%m-%d')}, Neckline: {signal['Neckline']:.2f}, Reason: {signal['Signal_Type']}")



--- Visualizing Examples ---

Plotting True Signals Examples:
No true signals found to plot.

Plotting False Signals Examples:
No false signals found to plot.

--- Analysis Insights ---
Total potential Double Bottom patterns identified (B1-P1-B2 structure): 0
Number of True Signals (breakout and target met): 0
Number of False Signals: 0
No potential patterns were identified to calculate a success rate.

Detailed Timestamps for True Signals (B2 Date - Pattern Start):

Detailed Timestamps for False Signals (B2 Date - Pattern Start) & Reason:
