# Mech-Exo Scoring System Exploration

This notebook explores the factor-based scoring system for ranking investment ideas.

In [None]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Mech-Exo imports
from mech_exo.scoring import IdeaScorer
from mech_exo.datasource import DataStorage, OHLCDownloader, FundamentalFetcher
from mech_exo.utils import ConfigManager

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("📊 Mech-Exo Scoring Exploration Loaded")

## 1. Initialize Scoring System

In [None]:
# Initialize scorer
try:
    scorer = IdeaScorer()
    print(f"✅ Scorer initialized with {len(scorer.factors)} factors")
    
    # Display factor information
    factor_info = []
    for name, factor in scorer.factors.items():
        factor_info.append({
            'Factor': name,
            'Weight': factor.weight,
            'Direction': factor.direction
        })
    
    factor_df = pd.DataFrame(factor_info)
    print("\n📋 Factor Configuration:")
    display(factor_df)
    
except Exception as e:
    print(f"❌ Failed to initialize scorer: {e}")
    scorer = None

## 2. Check Available Data

In [None]:
if scorer:
    storage = scorer.storage
    
    # Check universe
    universe = storage.get_universe()
    print(f"🌍 Universe contains {len(universe)} symbols")
    
    if not universe.empty:
        print("\nUniverse symbols:")
        display(universe.head(10))
        
        # Check data availability
        sample_symbols = universe['symbol'].head(5).tolist()
        
        ohlc_data = storage.get_ohlc_data(sample_symbols)
        fundamental_data = storage.get_fundamental_data(sample_symbols)
        news_data = storage.get_news_data(sample_symbols, days_back=7)
        
        print(f"\n📈 OHLC records: {len(ohlc_data)}")
        print(f"📊 Fundamental records: {len(fundamental_data)}")
        print(f"📰 News articles: {len(news_data)}")
    else:
        print("⚠️ No symbols in universe - run data pipeline first")

## 3. Run Scoring Analysis

In [None]:
if scorer and not universe.empty:
    try:
        # Get sample symbols for scoring
        sample_symbols = universe['symbol'].head(10).tolist()
        print(f"🎯 Scoring {len(sample_symbols)} symbols: {sample_symbols}")
        
        # Run scoring
        ranking = scorer.score(sample_symbols)
        
        if not ranking.empty:
            print(f"✅ Successfully scored {len(ranking)} symbols")
            
            # Display top results
            print("\n🏆 Top Investment Ideas:")
            display(ranking.head())
            
            # Save ranking
            scorer.save_ranking(ranking, "data/sample_ranking.csv")
            print("💾 Ranking saved to data/sample_ranking.csv")
            
        else:
            print("❌ No ranking results generated")
            
    except Exception as e:
        print(f"❌ Scoring failed: {e}")
        ranking = pd.DataFrame()
else:
    print("⚠️ Cannot run scoring - initialize scorer and data first")
    ranking = pd.DataFrame()

## 4. Analyze Scoring Results

In [None]:
if not ranking.empty:
    # Score distribution
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.hist(ranking['composite_score'], bins=10, alpha=0.7, edgecolor='black')
    plt.title('Distribution of Composite Scores')
    plt.xlabel('Composite Score')
    plt.ylabel('Frequency')
    
    plt.subplot(1, 2, 2)
    plt.scatter(ranking['rank'], ranking['composite_score'], alpha=0.7)
    plt.title('Rank vs Composite Score')
    plt.xlabel('Rank')
    plt.ylabel('Composite Score')
    
    plt.tight_layout()
    plt.show()
    
    # Summary statistics
    print("\n📈 Scoring Summary:")
    print(f"Mean Score: {ranking['composite_score'].mean():.3f}")
    print(f"Std Score: {ranking['composite_score'].std():.3f}")
    print(f"Min Score: {ranking['composite_score'].min():.3f}")
    print(f"Max Score: {ranking['composite_score'].max():.3f}")

## 5. Factor Analysis

In [None]:
if not ranking.empty and scorer:
    # Analyze factor contributions
    print("🔍 Factor Weight Analysis:")
    
    factor_weights = []
    for name, factor in scorer.factors.items():
        factor_weights.append({
            'Factor': name.replace('_', ' ').title(),
            'Weight': factor.weight,
            'Direction': factor.direction
        })
    
    weight_df = pd.DataFrame(factor_weights)
    weight_df = weight_df.sort_values('Weight', ascending=False)
    
    # Plot factor weights
    plt.figure(figsize=(10, 6))
    bars = plt.bar(range(len(weight_df)), weight_df['Weight'], alpha=0.7)
    plt.title('Factor Weights in Scoring Model')
    plt.xlabel('Factors')
    plt.ylabel('Weight (%)')
    plt.xticks(range(len(weight_df)), weight_df['Factor'], rotation=45, ha='right')
    
    # Color bars by direction
    colors = {'higher_better': 'green', 'lower_better': 'red', 'mean_revert': 'blue'}
    for i, direction in enumerate(weight_df['Direction']):
        bars[i].set_color(colors.get(direction, 'gray'))
    
    plt.tight_layout()
    plt.show()
    
    display(weight_df)

## 6. Test Different Scenarios

In [None]:
if scorer and not universe.empty:
    # Test with different symbol sets
    test_scenarios = {
        'Large Cap ETFs': ['SPY', 'QQQ', 'IWM'],
        'Tech Stocks': ['AAPL', 'MSFT', 'GOOGL'],
        'International': ['FXI', 'EEM', 'VEA']
    }
    
    scenario_results = {}
    
    for scenario_name, test_symbols in test_scenarios.items():
        # Filter to symbols that exist in universe
        available_symbols = [s for s in test_symbols if s in universe['symbol'].values]
        
        if available_symbols:
            try:
                scenario_ranking = scorer.score(available_symbols)
                if not scenario_ranking.empty:
                    scenario_results[scenario_name] = scenario_ranking
                    print(f"✅ {scenario_name}: Scored {len(scenario_ranking)} symbols")
                else:
                    print(f"❌ {scenario_name}: No results")
            except Exception as e:
                print(f"❌ {scenario_name}: Failed - {e}")
        else:
            print(f"⚠️ {scenario_name}: No symbols available in universe")
    
    # Display scenario results
    if scenario_results:
        print("\n🎭 Scenario Analysis Results:")
        for scenario_name, results in scenario_results.items():
            print(f"\n{scenario_name}:")
            display(results[['rank', 'symbol', 'composite_score']].head(3))

## 7. Data Quality Assessment

In [None]:
if scorer:
    # Check data completeness for scoring factors
    storage = scorer.storage
    
    # Get sample of fundamental data
    sample_symbols = universe['symbol'].head(20).tolist() if not universe.empty else []
    
    if sample_symbols:
        fundamental_data = storage.get_fundamental_data(sample_symbols)
        
        if not fundamental_data.empty:
            # Analyze data completeness
            completeness = (1 - fundamental_data.isnull().sum() / len(fundamental_data)) * 100
            
            # Filter to scoring-relevant columns
            scoring_columns = ['pe_ratio', 'price_to_book', 'return_on_equity', 
                             'revenue_growth', 'earnings_growth', 'debt_to_equity']
            
            available_scoring_cols = [col for col in scoring_columns if col in completeness.index]
            
            if available_scoring_cols:
                scoring_completeness = completeness[available_scoring_cols]
                
                print("\n📊 Data Completeness for Scoring Factors:")
                for col, pct in scoring_completeness.items():
                    status = "✅" if pct >= 80 else "⚠️" if pct >= 50 else "❌"
                    print(f"{status} {col}: {pct:.1f}%")
                
                # Plot completeness
                plt.figure(figsize=(10, 6))
                bars = plt.bar(range(len(scoring_completeness)), scoring_completeness.values)
                plt.title('Data Completeness for Scoring Factors')
                plt.xlabel('Factors')
                plt.ylabel('Completeness (%)')
                plt.xticks(range(len(scoring_completeness)), 
                          [col.replace('_', ' ').title() for col in scoring_completeness.index], 
                          rotation=45, ha='right')
                plt.axhline(y=80, color='green', linestyle='--', alpha=0.7, label='Good (80%)')
                plt.axhline(y=50, color='orange', linestyle='--', alpha=0.7, label='Acceptable (50%)')
                plt.legend()
                plt.tight_layout()
                plt.show()
            else:
                print("❌ No scoring factors found in fundamental data")
        else:
            print("❌ No fundamental data available")
    else:
        print("⚠️ No symbols available for data quality check")

## 8. Cleanup

In [None]:
# Close connections
if scorer:
    scorer.close()
    print("🔄 Database connections closed")

print("\n✅ Scoring exploration completed!")
print("\n📝 Next steps:")
print("1. Run the data pipeline to populate more data")
print("2. Adjust factor weights in config/factors.yml")
print("3. Test scoring on full universe")
print("4. Implement position sizing and risk management")