In [1]:
# Import required libraries
import os
import sys
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Add src directory to path for imports
sys.path.append('../src')

# Import signal outcome tagging functionality
from signal_outcome_tagger import (
    SignalOutcomeTagger,
    SignalOutcomeError,
    load_latest_matches,
    quick_tag_outcome,
    review_latest_feedback,
    VALID_OUTCOMES
)

print("📦 Signal Outcome Tagging System Loaded")
print(f"   Valid outcomes: {', '.join(VALID_OUTCOMES)}")
print("   Ready for pattern match feedback collection!")


📦 Signal Outcome Tagging System Loaded
   Valid outcomes: success, failure, uncertain
   Ready for pattern match feedback collection!


In [2]:
# Initialize the tagger and discover available files
signals_dir = '../signals'

try:
    tagger = SignalOutcomeTagger(signals_dir=signals_dir)
    match_files = tagger.find_available_match_files()
    
    print("📊 Available Match Files for Tagging:")
    print("=" * 50)
    
    if not match_files:
        print("❌ No match files found!")
        print("   Please run pattern scanning (User Story 1.5) first to generate matches.")
    else:
        for i, file_path in enumerate(match_files, 1):
            filename = os.path.basename(file_path)
            file_size = os.path.getsize(file_path)
            
            # Quick peek at file content
            try:
                temp_df = pd.read_csv(file_path)
                match_count = len(temp_df)
                confidence_range = f"{temp_df['confidence_score'].min():.3f} - {temp_df['confidence_score'].max():.3f}"
                
                print(f"   {i}. {filename}")
                print(f"      Matches: {match_count}, Confidence: {confidence_range}")
                print(f"      Size: {file_size:,} bytes")
                
            except Exception as e:
                print(f"   {i}. {filename} (Error reading: {e})")
        
        print(f"\n✅ Found {len(match_files)} match files ready for tagging")
        
except SignalOutcomeError as e:
    print(f"❌ Error initializing tagger: {e}")
    print("   Please ensure the signals directory exists and contains match files.")


📊 Available Match Files for Tagging:
   1. matches_20250622_212629.csv
      Matches: 4, Confidence: 0.843 - 0.907
      Size: 267 bytes

✅ Found 1 match files ready for tagging


In [3]:
# Load the latest match file
try:
    file_path, matches_df = load_latest_matches(signals_dir)
    
    print(f"📄 Loaded Match File: {os.path.basename(file_path)}")
    print("=" * 60)
    
    # Display basic statistics
    summary = tagger.get_match_summary(matches_df)
    
    print(f"📊 Match File Summary:")
    print(f"   Total matches: {summary['total_matches']}")
    print(f"   Already tagged: {summary['tagged_count']}")
    print(f"   Untagged: {summary['untagged_count']}")
    print(f"   Confidence range: {summary['confidence_range'][0]:.3f} - {summary['confidence_range'][1]:.3f}")
    print(f"   Date range: {summary['date_range'][0]} to {summary['date_range'][1]}")
    print(f"   Tickers: {', '.join(summary['tickers'])}")
    
    # Display the matches for review
    print(f"\n🎯 Pattern Matches:")
    display_columns = ['ticker', 'window_start_date', 'window_end_date', 'confidence_score', 'rank']
    
    # Add outcome columns if they exist
    if 'outcome' in matches_df.columns:
        display_columns.extend(['outcome', 'feedback_notes'])
    
    display_df = matches_df[display_columns].copy()
    
    # Format confidence scores for better readability
    display_df['confidence_score'] = display_df['confidence_score'].round(3)
    
    print(display_df.to_string(index=False, max_colwidth=20))
    
    print(f"\n✅ Match file loaded successfully!")
    print(f"   Ready for outcome tagging.")
    
except SignalOutcomeError as e:
    print(f"❌ Error loading matches: {e}")
    matches_df = None
    file_path = None


✓ Loaded 4 matches from matches_20250622_212629.csv
📄 Loaded Match File: matches_20250622_212629.csv
📊 Match File Summary:
   Total matches: 4
   Already tagged: 0
   Untagged: 4
   Confidence range: 0.843 - 0.907
   Date range: 2024-08-19 to 2024-11-15
   Tickers: 0005.HK, 0388.HK

🎯 Pattern Matches:
 ticker window_start_date window_end_date  confidence_score  rank outcome feedback_notes
0005.HK        2024-08-19      2024-10-02             0.907     1    <NA>           <NA>
0005.HK        2024-11-15      2024-12-30             0.863     2    <NA>           <NA>
0388.HK        2024-08-19      2024-10-02             0.844     3    <NA>           <NA>
0005.HK        2024-10-03      2024-11-14             0.843     4    <NA>           <NA>

✅ Match file loaded successfully!
   Ready for outcome tagging.


In [4]:
# Example: Tag a single match outcome
# Modify these parameters to tag your specific matches

if matches_df is not None:
    # Configuration for tagging - MODIFY THESE VALUES
    ticker_to_tag = '0005.HK'  # Change to your target ticker
    window_start_to_tag = '2024-08-19'  # Change to your target date
    outcome_to_apply = 'success'  # Choose: 'success', 'failure', 'uncertain'
    feedback_notes = 'Strong breakout after support test - volume confirmed the move'
    
    try:
        # Apply the tag
        updated_matches_df = tagger.tag_outcome(
            matches_df,
            ticker=ticker_to_tag,
            window_start_date=window_start_to_tag,
            outcome=outcome_to_apply,
            feedback_notes=feedback_notes,
            overwrite=False  # Set to True to overwrite existing tags
        )
        
        print(f"✅ Successfully tagged {ticker_to_tag} ({window_start_to_tag}) as '{outcome_to_apply}'")
        
        # Update our working DataFrame
        matches_df = updated_matches_df
        
        # Show the updated row
        tagged_row = matches_df[
            (matches_df['ticker'] == ticker_to_tag) & 
            (matches_df['window_start_date'] == window_start_to_tag)
        ]
        
        if not tagged_row.empty:
            print("\n📋 Tagged Match Details:")
            row = tagged_row.iloc[0]
            print(f"   Ticker: {row['ticker']}")
            print(f"   Period: {row['window_start_date']} to {row['window_end_date']}")
            print(f"   Confidence: {row['confidence_score']:.3f}")
            print(f"   Outcome: {row['outcome']}")
            print(f"   Notes: {row['feedback_notes']}")
            print(f"   Tagged: {row['tagged_date']}")
        
    except SignalOutcomeError as e:
        print(f"❌ Error tagging outcome: {e}")
        print("   Check ticker and date values, or use overwrite=True for existing tags")
else:
    print("⚠️  No matches loaded. Please run the previous cell to load match data.")


✓ Added outcome tag for 0005.HK (2024-08-19): success
✅ Successfully tagged 0005.HK (2024-08-19) as 'success'

📋 Tagged Match Details:
   Ticker: 0005.HK
   Period: 2024-08-19 to 2024-10-02
   Confidence: 0.907
   Outcome: success
   Notes: Strong breakout after support test - volume confirmed the move
   Tagged: 2025-06-23T09:30:28.233541


In [5]:
# Save the labeled matches
if matches_df is not None and file_path is not None:
    try:
        # Save labeled matches with automatic naming
        output_path = tagger.save_labeled_matches(matches_df, file_path)
        
        print(f"✅ Labeled matches saved successfully!")
        print(f"   Output file: {os.path.basename(output_path)}")
        
        # Verify saved content
        saved_df = pd.read_csv(output_path)
        tagged_count = (~saved_df['outcome'].isna()).sum()
        
        print(f"\n📊 Saved File Summary:")
        print(f"   Total matches: {len(saved_df)}")
        print(f"   Tagged matches: {tagged_count}")
        print(f"   Untagged matches: {len(saved_df) - tagged_count}")
        
        if tagged_count > 0:
            outcome_counts = saved_df['outcome'].value_counts()
            print(f"\n🎯 Outcome Distribution:")
            for outcome, count in outcome_counts.items():
                percentage = (count / tagged_count * 100)
                print(f"   {outcome.title()}: {count} ({percentage:.1f}%)")
        
    except SignalOutcomeError as e:
        print(f"❌ Error saving labeled matches: {e}")
else:
    print("⚠️  No matches to save. Please load and tag matches first.")


💾 Labeled matches saved to: matches_20250622_212629_labeled.csv
✅ Labeled matches saved successfully!
   Output file: matches_20250622_212629_labeled.csv

📊 Saved File Summary:
   Total matches: 4
   Tagged matches: 1
   Untagged matches: 3

🎯 Outcome Distribution:
   Success: 1 (100.0%)


In [6]:
# Analyze feedback results
if matches_df is not None:
    print("📊 Feedback Analysis")
    print("=" * 50)
    
    # Get detailed feedback analysis
    feedback_results = tagger.review_feedback(matches_df)
    
    # Additional insights
    if feedback_results['tagged_matches'] > 0:
        print("\n🔍 Additional Insights:")
        
        # Success rate by ticker (if multiple tickers)
        tagged_matches = matches_df[matches_df['outcome'].notna()]
        
        if len(tagged_matches['ticker'].unique()) > 1:
            print("\n📋 Performance by Ticker:")
            ticker_performance = tagged_matches.groupby('ticker')['outcome'].value_counts().unstack(fill_value=0)
            
            for ticker in ticker_performance.index:
                success = ticker_performance.loc[ticker].get('success', 0)
                failure = ticker_performance.loc[ticker].get('failure', 0)
                total_decisive = success + failure
                
                if total_decisive > 0:
                    success_rate = success / total_decisive
                    print(f"   {ticker}: {success_rate:.1%} success rate ({success}S/{failure}F)")
        
        # Recommendations based on results
        print("\n💡 Recommendations:")
        
        overall_success_rate = feedback_results['outcome_summary']['counts'].get('success', 0) / max(1, feedback_results['tagged_matches'])
        
        if overall_success_rate > 0.7:
            print("   ✅ Model performing well - consider lowering confidence threshold")
        elif overall_success_rate < 0.5:
            print("   ⚠️  Model needs improvement - consider raising confidence threshold")
        else:
            print("   📊 Model performance is moderate - continue collecting feedback")
        
        if feedback_results['tagged_matches'] < 20:
            print("   📈 Tag more matches for better statistical significance")
    
    else:
        print("\n💡 No tagged matches found for analysis.")
        print("   Tag some matches using the cells above to see feedback analysis.")
else:
    print("⚠️  No matches loaded for analysis.")


📊 Feedback Analysis
📊 Feedback Analysis Summary
📈 Overall Statistics:
   Total matches: 4
   Tagged matches: 1
   Tagging rate: 25.0%

🎯 Outcome Breakdown:
   Success: 1 (100.0%)

📊 Performance by Confidence Band:
Band         Count    Success    Failure    Success%  
------------------------------------------------------------
0.9-1.0      1        1          0          100.0%    

🔍 Additional Insights:

💡 Recommendations:
   ✅ Model performing well - consider lowering confidence threshold
   📈 Tag more matches for better statistical significance
