In [16]:
import pandas as pd
import glob
import os
from pathlib import Path

# Function to load all data files for a cryptocurrency
def load_crypto_data(crypto_name, data_folder):
    """
    Load all .Last.txt files for a cryptocurrency and combine them into a single DataFrame.
    
    Parameters:
    -----------
    crypto_name : str
        Name of the cryptocurrency (e.g., 'BTC', 'ETH', 'SOL', 'XRP')
    data_folder : str
        Path to the folder containing the data files
        
    Returns:
    --------
    pd.DataFrame
        Combined DataFrame with columns: DateTime, Open, High, Low, Close, Volume
    """
    # Find all .Last.txt files in the folder (including subdirectories)
    txt_files = glob.glob(os.path.join(data_folder, "**/*.Last.txt"), recursive=True)
    
    if not txt_files:
        print(f"‚ö†Ô∏è Warning: No .Last.txt files found in {data_folder}")
        return None
    
    # Sort files to ensure chronological order
    txt_files.sort()
    
    print(f"üìä Loading {crypto_name} data from {len(txt_files)} file(s)...")
    
    all_dataframes = []
    
    for f in txt_files:
        try:
            # Read the file (semicolon-separated, no header)
            df = pd.read_csv(f, sep=';', header=None, 
                           names=['DateTime', 'Open', 'High', 'Low', 'Close', 'Volume'])
            
            # Parse datetime (format: YYYYMMDD HHMMSS) and make timezone-aware (UTC)
            df['DateTime'] = pd.to_datetime(df['DateTime'], format='%Y%m%d %H%M%S', utc=True)
            
            # Set DateTime as index
            df.set_index('DateTime', inplace=True)
            
            all_dataframes.append(df)
            print(f"   ‚úì Loaded {os.path.basename(f)}: {len(df):,} rows")
            
        except Exception as e:
            print(f"   ‚úó Error loading {os.path.basename(f)}: {e}")
    
    if not all_dataframes:
        print(f"‚ö†Ô∏è No data loaded for {crypto_name}")
        return None
    
    # Combine all dataframes
    combined_df = pd.concat(all_dataframes, axis=0)
    
    # Sort by datetime to ensure proper order
    combined_df.sort_index(inplace=True)
    
    # Remove duplicates if any
    combined_df = combined_df[~combined_df.index.duplicated(keep='first')]
    
    print(f"‚úÖ {crypto_name} data loaded: {len(combined_df):,} total rows")
    print(f"   Date range: {combined_df.index.min()} to {combined_df.index.max()}")
    print()
    
    return combined_df

# Load data for all cryptocurrencies
print("=" * 60)
print("Loading Cryptocurrency Data")
print("=" * 60)
print()

# BTC Data
btc_data = load_crypto_data('BTC', 'BTCUSD DATA')

# ETH Data
eth_data = load_crypto_data('ETH', 'ETHUSD DATA')

# SOL Data
sol_data = load_crypto_data('SOL', 'SOLUSD DATA')

# XRP Data (note: XRP has a nested folder structure)
xrp_data = load_crypto_data('XRP', 'XRPUSD DATA')

print("=" * 60)
print("Data Loading Complete!")
print("=" * 60)
print()
print("Available DataFrames:")
print(f"  - btc_data: {len(btc_data) if btc_data is not None else 0:,} rows")
print(f"  - eth_data: {len(eth_data) if eth_data is not None else 0:,} rows")
print(f"  - sol_data: {len(sol_data) if sol_data is not None else 0:,} rows")
print(f"  - xrp_data: {len(xrp_data) if xrp_data is not None else 0:,} rows")


Loading Cryptocurrency Data

üìä Loading BTC data from 7 file(s)...
   ‚úì Loaded BTCUSDT_2019_minute.Last.txt: 525,600 rows
   ‚úì Loaded BTCUSDT_2020_minute.Last.txt: 527,040 rows
   ‚úì Loaded BTCUSDT_2021_minute.Last.txt: 525,600 rows
   ‚úì Loaded BTCUSDT_2022_minute.Last.txt: 525,600 rows
   ‚úì Loaded BTCUSDT_2023_minute.Last.txt: 525,600 rows
   ‚úì Loaded BTCUSDT_2024_minute.Last.txt: 527,040 rows
   ‚úì Loaded BTCUSDT_2025_minute.Last.txt: 499,680 rows
‚úÖ BTC data loaded: 3,656,160 total rows
   Date range: 2019-01-01 00:00:00+00:00 to 2025-12-13 23:59:00+00:00

üìä Loading ETH data from 7 file(s)...
   ‚úì Loaded ETHUSDT_2019_minute.Last.txt: 525,600 rows
   ‚úì Loaded ETHUSDT_2020_minute.Last.txt: 527,040 rows
   ‚úì Loaded ETHUSDT_2021_minute.Last.txt: 525,600 rows
   ‚úì Loaded ETHUSDT_2022_minute.Last.txt: 525,600 rows
   ‚úì Loaded ETHUSDT_2023_minute.Last.txt: 525,600 rows
   ‚úì Loaded ETHUSDT_2024_minute.Last.txt: 527,040 rows
   ‚úì Loaded ETHUSDT_2025_minute.Las

In [17]:
# ============================================================================
# 2. RESAMPLE TO DAILY CANDLES
# ============================================================================

def resample_to_daily(df_minute):
    """
    Resample minute-level OHLCV data to daily candles.
    
    Parameters:
    -----------
    df_minute : pd.DataFrame
        DataFrame with minute-level data, indexed by DateTime
        Must have columns: Open, High, Low, Close, Volume
        
    Returns:
    --------
    pd.DataFrame
        Daily candles with columns: Open, High, Low, Close, Volume
        Indexed by date (date only, no time)
    """
    if df_minute is None or len(df_minute) == 0:
        return None
    
    # Resample to daily
    daily = df_minute.resample('D').agg({
        'Open': 'first',      # First open of the day
        'High': 'max',        # Maximum high of the day
        'Low': 'min',         # Minimum low of the day
        'Close': 'last',      # Last close of the day
        'Volume': 'sum'       # Sum of volume for the day
    })
    
    # Remove days with no data (missing days)
    daily = daily.dropna()
    
    # Normalize index to date only (remove time component)
    daily.index = daily.index.normalize()
    
    return daily


In [18]:
# ============================================================================
# 3. DAILY BIAS CLASSIFICATION (10-SCENARIO LOGIC)
# ============================================================================

def add_rvol_and_volume_label(df, lookback=20):
    """
    Add Relative Volume (RVOL) and volume_label columns to the DataFrame.
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with 'Volume' column, indexed by date/time
    lookback : int
        Number of bars to use for rolling average (default 20)
        
    Returns:
    --------
    pd.DataFrame
        DataFrame with added 'rvol' and 'volume_label' columns
    """
    df = df.copy()
    
    # Compute average volume over last N bars
    # Use min_periods=lookback so RVOL is NaN if there aren't enough bars
    df['avg_vol'] = df['Volume'].rolling(window=lookback, min_periods=lookback).mean()
    
    # Compute RVOL = current_bar_volume / avg_vol
    # Will be NaN if there aren't enough bars for avg_vol
    df['rvol'] = df['Volume'] / df['avg_vol']
    
    # Check if current bar's volume is a local maximum over the last N bars (same as lookback)
    # Only check if we have enough history
    df['is_local_max'] = False
    if len(df) >= lookback:
        rolling_max = df['Volume'].rolling(window=lookback, min_periods=lookback).max()
        df['is_local_max'] = df['Volume'] == rolling_max
    
    # Assign volume labels based on RVOL thresholds
    # If there's not yet N bars of history, RVOL will be NaN, treat as "Normal"
    volume_labels = []
    
    for idx, row in df.iterrows():
        rvol = row['rvol']
        is_local_max = row['is_local_max']
        
        # If RVOL is NaN (not enough history), treat as "Normal"
        if pd.isna(rvol):
            volume_labels.append('Normal')
        # Ultra-High: rvol >= 3.0 OR current bar's volume is local maximum
        elif rvol >= 3.0 or is_local_max:
            volume_labels.append('Ultra-High')
        # High: 1.5 <= rvol < 3.0
        elif rvol >= 1.5:
            volume_labels.append('High')
        # Normal: 0.75 <= rvol < 1.5
        elif rvol >= 0.75:
            volume_labels.append('Normal')
        # Low: 0.4 <= rvol < 0.75
        elif rvol >= 0.4:
            volume_labels.append('Low')
        # Ultra-Low: rvol < 0.4
        else:
            volume_labels.append('Ultra-Low')
    
    df['volume_label'] = volume_labels
    
    # Clean up temporary columns
    df = df.drop(columns=['is_local_max'])
    
    return df


def map_volume_label_to_scenario_condition(volume_label):
    """
    Map volume_label to the textual conditions used in bias scenarios.
    
    Translation rules from table:
    - "Above average" ‚Üí High or Ultra-High
    - "Low to average" / "Below average" ‚Üí Low or Normal
    - "Low" ‚Üí Low (exact match required)
    - "Ultra-high" ‚Üí Ultra-High (exact match required)
    
    Parameters:
    -----------
    volume_label : str
        Volume label: 'Ultra-High', 'High', 'Normal', 'Low', or 'Ultra-Low'
        
    Returns:
    --------
    dict
        Dictionary with keys: 'above_average', 'low_to_average', 'below_average', 'low', 'ultra_high'
        Values are boolean indicating if the condition is met
    """
    return {
        'above_average': volume_label in ['High', 'Ultra-High'],  # "Above average"
        'low_to_average': volume_label in ['Low', 'Normal'],       # "Low to average"
        'below_average': volume_label in ['Low', 'Normal'],        # "Below average"
        'low': volume_label == 'Low',                             # "Low" (exact)
        'ultra_high': volume_label == 'Ultra-High'                 # "Ultra-high" (exact)
    }


def classify_price_volume_relationship(candle_a, candle_b):
    """
    Classify the price-volume relationship from candle A to candle B.
    
    Returns:
    --------
    tuple: (price_direction, volume_direction, description)
        price_direction: 'up', 'down', or 'inside'
        volume_direction: 'up', 'down', or 'flat'
        description: string describing the relationship
    """
    price_a_to_b = candle_b['Close'] - candle_a['Close']
    volume_a_to_b = candle_b['Volume'] - candle_a['Volume']
    
    # Price direction
    if price_a_to_b > 0:
        price_dir = 'up'
    elif price_a_to_b < 0:
        price_dir = 'down'
    else:
        price_dir = 'inside'
    
    # Volume direction (using 5% threshold for 'flat')
    volume_change_pct = abs(volume_a_to_b / candle_a['Volume']) if candle_a['Volume'] > 0 else 0
    if volume_change_pct < 0.05:
        volume_dir = 'flat'
    elif volume_a_to_b > 0:
        volume_dir = 'up'
    else:
        volume_dir = 'down'
    
    # Generate description
    if price_dir == 'up' and volume_dir == 'up':
        desc = 'converging'
    elif price_dir == 'up' and (volume_dir == 'down' or volume_dir == 'flat'):
        desc = 'diverging'
    elif price_dir == 'down' and volume_dir == 'up':
        desc = 'converging'
    elif price_dir == 'down' and (volume_dir == 'down' or volume_dir == 'flat'):
        desc = 'diverging'
    else:
        desc = 'neutral'
    
    return price_dir, volume_dir, desc


def determine_scenario(candle_a, candle_b):
    """
    Determine which of the 10 scenarios applies based on candle A and B.
    
    Uses the volume_label column from candle_b (which should be computed using add_rvol_and_volume_label).
    
    Parameters:
    -----------
    candle_a : pd.Series
        Previous day before B (Candle A) with OHLCV data
    candle_b : pd.Series
        Previous daily candle (Candle B) with OHLCV data and volume_label column
        
    Returns:
    --------
    int
        Scenario number (1-10), or None if no scenario matches
    """
    pch = candle_a['High']  # Previous Candle High
    pcl = candle_a['Low']   # Previous Candle Low
    
    candle_b_high = candle_b['High']
    candle_b_low = candle_b['Low']
    candle_b_close = candle_b['Close']
    candle_b_open = candle_b['Open']
    
    # Get volume label from candle_b (must have been computed using add_rvol_and_volume_label)
    volume_label = candle_b.get('volume_label', 'Normal')  # Default to 'Normal' if not present
    
    # Map volume label to scenario conditions
    vol_conditions = map_volume_label_to_scenario_condition(volume_label)
    
    # Determine if candle B has wicks above/below PCH/PCL
    wicks_above_pch = candle_b_high > pch
    wicks_below_pcl = candle_b_low < pcl
    
    # Determine if candle B tests PCH/PCL (touches or very close, within 0.1%)
    tolerance = (pch - pcl) * 0.001
    tests_pch = abs(candle_b_high - pch) <= tolerance or (candle_b_high >= pch and candle_b_low <= pch)
    tests_pcl = abs(candle_b_low - pcl) <= tolerance or (candle_b_low <= pcl and candle_b_high >= pcl)
    
    # Determine if candle B stays inside candle A
    stays_inside = candle_b_high <= pch and candle_b_low >= pcl
    
    # Check if inside bar
    is_inside_bar = stays_inside
    
    # Check if inside bar with long wicks (wicks are significant portion of range)
    candle_b_range = candle_b_high - candle_b_low
    candle_a_range = pch - pcl
    has_long_wicks = False
    if is_inside_bar and candle_a_range > 0:
        # Long wicks if body is small relative to range, or wicks are large
        body_size = abs(candle_b_close - candle_b_open)
        if body_size < candle_b_range * 0.3:  # Small body relative to range
            has_long_wicks = True
    
    # Price-volume relationship
    price_dir, vol_dir, pv_desc = classify_price_volume_relationship(candle_a, candle_b)
    
    # Check for new high/low vs A
    new_high_vs_a = candle_b_high > pch
    new_low_vs_a = candle_b_low < pcl
    
    # Check scenarios in priority order (more specific conditions first)
    # Based on the 10-scenario table with exact volume signature mappings
    
    # First check inside bar scenarios (mutually exclusive with wicks/tests)
    if is_inside_bar:
        # SCENARIO 10: Inside bar with long wicks, "Above average" volume, high effort little result (churn)
        if has_long_wicks and vol_conditions['above_average']:
            return 10
        # SCENARIO 9: Inside bar, "Below average" volume, range and volume contraction
        if not has_long_wicks and vol_conditions['below_average']:
            return 9
    
    # Check wicks scenarios before test scenarios (wicks are more specific than tests)
    # SCENARIO 3: Wicks above PCH, closes back below PCH, "Above average" volume, new high rejected on heavy volume
    if wicks_above_pch and candle_b_close < pch and vol_conditions['above_average'] and new_high_vs_a:
        return 3
    
    # SCENARIO 4: Wicks above PCH, closes back below PCH, "Below average" volume, attempt higher on low volume
    if wicks_above_pch and candle_b_close < pch and vol_conditions['below_average']:
        return 4
    
    # SCENARIO 7: Wicks below PCL, closes back above PCL, "Above average" volume, new low rejected on heavy volume
    if wicks_below_pcl and candle_b_close > pcl and vol_conditions['above_average'] and new_low_vs_a:
        return 7
    
    # SCENARIO 8: Wicks below PCL, closes back above PCL, "Below average" volume, attempt lower on low volume
    if wicks_below_pcl and candle_b_close > pcl and vol_conditions['below_average']:
        return 8
    
    # Now check test scenarios (exclude cases already caught by wicks)
    # SCENARIO 1: Tests PCH (but doesn't wick above), closes above PCH, "Above average" volume, price up volume up (converging)
    if not wicks_above_pch and tests_pch and candle_b_close > pch and vol_conditions['above_average'] and pv_desc == 'converging' and price_dir == 'up':
        return 1
    
    # SCENARIO 2: Tests PCH (but doesn't wick above), closes above PCH, "Low to average" volume, price up volume down/flat (diverging)
    if not wicks_above_pch and tests_pch and candle_b_close > pch and vol_conditions['low_to_average'] and pv_desc == 'diverging' and price_dir == 'up':
        return 2
    
    # SCENARIO 5: Tests PCL (but doesn't wick below), closes below PCL, "Above average" volume, price down volume up (converging)
    if not wicks_below_pcl and tests_pcl and candle_b_close < pcl and vol_conditions['above_average'] and pv_desc == 'converging' and price_dir == 'down':
        return 5
    
    # SCENARIO 6: Tests PCL (but doesn't wick below), closes below PCL, "Low to average" volume, price down volume down/flat (diverging)
    if not wicks_below_pcl and tests_pcl and candle_b_close < pcl and vol_conditions['low_to_average'] and pv_desc == 'diverging' and price_dir == 'down':
        return 6
    
    return None


def scenario_to_bias(scenario):
    """
    Map scenario number to bias label.
    
    Returns:
    --------
    str
        'bullish', 'bearish', or 'neutral'
    """
    if scenario is None:
        return 'neutral'
    
    # Scenario 1: Strong bullish continuation
    if scenario == 1:
        return 'bullish'
    
    # Scenario 2: Weak/potential bull trap; bearish
    if scenario == 2:
        return 'bearish'
    
    # Scenario 3: Bearish reversal bias
    if scenario == 3:
        return 'bearish'
    
    # Scenario 4: Bearish bias
    if scenario == 4:
        return 'bearish'
    
    # Scenario 5: Strong bearish continuation
    if scenario == 5:
        return 'bearish'
    
    # Scenario 6: Weak/potential bear trap; bullish
    if scenario == 6:
        return 'bullish'
    
    # Scenario 7: Bullish reversal bias
    if scenario == 7:
        return 'bullish'
    
    # Scenario 8: Bullish bias
    if scenario == 8:
        return 'bullish'
    
    # Scenario 9: Neutral bias
    if scenario == 9:
        return 'neutral'
    
    # Scenario 10: Neutral bias (wait for clear break)
    if scenario == 10:
        return 'neutral'
    
    return 'neutral'


def classify_daily_bias(df_daily, lookback=20):
    """
    Classify daily bias for all eligible days in the daily DataFrame.
    
    Uses the RVOL framework to classify volume levels before determining bias scenarios.
    
    Parameters:
    -----------
    df_daily : pd.DataFrame
        Daily candles with columns: Open, High, Low, Close, Volume
        Indexed by date
    lookback : int
        Number of bars to use for RVOL calculation (default 20 days)
        
    Returns:
    --------
    pd.DataFrame
        DataFrame with columns: date, bias, scenario
        Only includes days where bias could be determined (starting from 3rd day)
    """
    if df_daily is None or len(df_daily) < 3:
        return pd.DataFrame(columns=['date', 'bias', 'scenario'])
    
    # Add RVOL and volume_label columns using the RVOL framework
    df_daily = add_rvol_and_volume_label(df_daily, lookback=lookback)
    
    # Store results
    results = []
    
    # Iterate starting from the 3rd day (index 2)
    # Candle A = day i-2, Candle B = day i-1, Candle C = day i
    for i in range(2, len(df_daily)):
        candle_a = df_daily.iloc[i-2]  # Previous day before B
        candle_b = df_daily.iloc[i-1]  # Previous daily candle
        candle_c_date = df_daily.index[i]  # Current day (C)
        
        # Determine scenario (now uses volume_label from candle_b)
        scenario = determine_scenario(candle_a, candle_b)
        
        # Map scenario to bias
        bias = scenario_to_bias(scenario)
        
        results.append({
            'date': candle_c_date,
            'bias': bias,
            'scenario': scenario
        })
    
    return pd.DataFrame(results)


In [19]:
# ============================================================================
# 4. ITERATE OVER FULL HISTORY AND COMPUTE BIASES
# ============================================================================

def compute_all_biases(data_dict, lookback=20):
    """
    Compute daily biases for all symbols using the RVOL framework.
    
    Parameters:
    -----------
    data_dict : dict
        Dictionary mapping symbol names to minute-level DataFrames
        e.g., {'BTC': btc_data, 'ETH': eth_data, ...}
    lookback : int
        Number of bars to use for RVOL calculation (default 20)
        
    Returns:
    --------
    pd.DataFrame
        DataFrame with columns: date, symbol, bias, scenario
    """
    all_biases = []
    
    for symbol, df_minute in data_dict.items():
        if df_minute is None or len(df_minute) == 0:
            print(f"‚ö†Ô∏è Skipping {symbol}: no data available")
            continue
        
        print(f"üìà Processing {symbol}...")
        
        # Resample to daily
        df_daily = resample_to_daily(df_minute)
        
        if df_daily is None or len(df_daily) < 3:
            print(f"   ‚ö†Ô∏è {symbol}: insufficient data for bias calculation (need at least 3 days)")
            continue
        
        # Classify biases (now uses RVOL framework)
        bias_df = classify_daily_bias(df_daily, lookback=lookback)
        
        # Add symbol column
        bias_df['symbol'] = symbol
        
        # Reorder columns
        bias_df = bias_df[['date', 'symbol', 'bias', 'scenario']]
        
        all_biases.append(bias_df)
        print(f"   ‚úÖ {symbol}: {len(bias_df):,} days with bias classifications")
    
    if not all_biases:
        return pd.DataFrame(columns=['date', 'symbol', 'bias', 'scenario'])
    
    # Combine all results
    combined_bias_df = pd.concat(all_biases, ignore_index=True)
    
    return combined_bias_df


In [20]:
# ============================================================================
# 5. PRODUCE SUMMARY COUNTS
# ============================================================================

def summarize_bias_counts(bias_df):
    """
    Produce summary counts of bullish, bearish, and neutral biases for each symbol.
    
    Parameters:
    -----------
    bias_df : pd.DataFrame
        DataFrame with columns: date, symbol, bias, scenario
        
    Returns:
    --------
    pd.DataFrame
        Summary DataFrame with columns: symbol, bullish, bearish, neutral, total
    dict
        Dictionary mapping symbol to counts dict
    """
    if bias_df is None or len(bias_df) == 0:
        print("No bias data available for summary.")
        return pd.DataFrame(), {}
    
    summary_list = []
    summary_dict = {}
    
    for symbol in bias_df['symbol'].unique():
        symbol_data = bias_df[bias_df['symbol'] == symbol]
        
        bullish_count = len(symbol_data[symbol_data['bias'] == 'bullish'])
        bearish_count = len(symbol_data[symbol_data['bias'] == 'bearish'])
        neutral_count = len(symbol_data[symbol_data['bias'] == 'neutral'])
        total_count = len(symbol_data)
        
        summary_list.append({
            'symbol': symbol,
            'bullish': bullish_count,
            'bearish': bearish_count,
            'neutral': neutral_count,
            'total': total_count
        })
        
        summary_dict[symbol] = {
            'bullish': bullish_count,
            'bearish': bearish_count,
            'neutral': neutral_count,
            'total': total_count
        }
    
    summary_df = pd.DataFrame(summary_list)
    
    # Calculate totals across all symbols
    total_bullish = summary_df['bullish'].sum()
    total_bearish = summary_df['bearish'].sum()
    total_neutral = summary_df['neutral'].sum()
    total_days = summary_df['total'].sum()
    
    # Add totals row to the DataFrame
    totals_row = pd.DataFrame([{
        'symbol': 'TOTAL',
        'bullish': total_bullish,
        'bearish': total_bearish,
        'neutral': total_neutral,
        'total': total_days
    }])
    summary_df = pd.concat([summary_df, totals_row], ignore_index=True)
    
    # Print summary
    print("=" * 60)
    print("Daily Bias Summary by Symbol")
    print("=" * 60)
    for _, row in summary_df.iterrows():
        print(f"{row['symbol']} ‚Äì bullish: {row['bullish']:,}, "
              f"bearish: {row['bearish']:,}, "
              f"neutral: {row['neutral']:,} "
              f"(total: {row['total']:,} days)")
    print("=" * 60)
    print()
    
    return summary_df, summary_dict


In [21]:
# ============================================================================
# EXECUTE: Resample to daily and compute biases
# ============================================================================

print("=" * 60)
print("Resampling to Daily Candles")
print("=" * 60)
print()

# Prepare data dictionary
crypto_data = {
    'BTC': btc_data,
    'ETH': eth_data,
    'SOL': sol_data,
    'XRP': xrp_data
}

# Resample each to daily
daily_data = {}
for symbol, df_minute in crypto_data.items():
    if df_minute is not None:
        daily_data[symbol] = resample_to_daily(df_minute)
        if daily_data[symbol] is not None:
            print(f"‚úÖ {symbol}: {len(daily_data[symbol]):,} daily candles")
        else:
            print(f"‚ö†Ô∏è {symbol}: failed to resample")
    else:
        print(f"‚ö†Ô∏è {symbol}: no minute data available")

print()
print("=" * 60)
print("Computing Daily Biases")
print("=" * 60)
print()

# Compute all biases (using RVOL framework with default lookback=20)
bias_results = compute_all_biases(crypto_data, lookback=20)

print()
print("=" * 60)
print("Bias Computation Complete!")
print("=" * 60)
print(f"Total bias classifications: {len(bias_results):,}")
print()

# Display first few rows
if len(bias_results) > 0:
    print("Sample of bias results:")
    print(bias_results.head(10))
    print()


Resampling to Daily Candles

‚úÖ BTC: 2,539 daily candles
‚úÖ ETH: 2,539 daily candles
‚úÖ SOL: 1,078 daily candles
‚úÖ XRP: 2,540 daily candles

Computing Daily Biases

üìà Processing BTC...
   ‚úÖ BTC: 2,537 days with bias classifications
üìà Processing ETH...
   ‚úÖ ETH: 2,537 days with bias classifications
üìà Processing SOL...
   ‚úÖ SOL: 1,076 days with bias classifications
üìà Processing XRP...
   ‚úÖ XRP: 2,538 days with bias classifications

Bias Computation Complete!
Total bias classifications: 8,688

Sample of bias results:
                       date symbol     bias  scenario
0 2019-01-03 00:00:00+00:00    BTC  neutral       NaN
1 2019-01-04 00:00:00+00:00    BTC  bullish       8.0
2 2019-01-05 00:00:00+00:00    BTC  bullish       8.0
3 2019-01-06 00:00:00+00:00    BTC  bearish       4.0
4 2019-01-07 00:00:00+00:00    BTC  bullish       8.0
5 2019-01-08 00:00:00+00:00    BTC  neutral       NaN
6 2019-01-09 00:00:00+00:00    BTC  bearish       4.0
7 2019-01-10 00:00:00+0

In [22]:
# ============================================================================
# EXECUTE: Produce summary counts
# ============================================================================

# Generate summary
summary_df, summary_dict = summarize_bias_counts(bias_results)

# Display summary DataFrame
if len(summary_df) > 0:
    print("Summary DataFrame:")
    print(summary_df)
    print()
    
    # Also available as dictionary for programmatic access
    print("Summary Dictionary (for programmatic access):")
    for symbol, counts in summary_dict.items():
        print(f"  {symbol}: {counts}")


Daily Bias Summary by Symbol
BTC ‚Äì bullish: 455, bearish: 572, neutral: 1,510 (total: 2,537 days)
ETH ‚Äì bullish: 458, bearish: 563, neutral: 1,516 (total: 2,537 days)
SOL ‚Äì bullish: 178, bearish: 249, neutral: 649 (total: 1,076 days)
XRP ‚Äì bullish: 458, bearish: 536, neutral: 1,544 (total: 2,538 days)
TOTAL ‚Äì bullish: 1,549, bearish: 1,920, neutral: 5,219 (total: 8,688 days)

Summary DataFrame:
  symbol  bullish  bearish  neutral  total
0    BTC      455      572     1510   2537
1    ETH      458      563     1516   2537
2    SOL      178      249      649   1076
3    XRP      458      536     1544   2538
4  TOTAL     1549     1920     5219   8688

Summary Dictionary (for programmatic access):
  BTC: {'bullish': 455, 'bearish': 572, 'neutral': 1510, 'total': 2537}
  ETH: {'bullish': 458, 'bearish': 563, 'neutral': 1516, 'total': 2537}
  SOL: {'bullish': 178, 'bearish': 249, 'neutral': 649, 'total': 1076}
  XRP: {'bullish': 458, 'bearish': 536, 'neutral': 1544, 'total': 2538}


In [23]:
# ============================================================================
# EXPORT BIAS DATA TO CSV FOR REACT CALENDAR APP
# ============================================================================

# Export bias_results to CSV for the React calendar app
if 'bias_results' in globals() and len(bias_results) > 0:
    # Format date column for CSV (YYYY-MM-DD format)
    export_df = bias_results.copy()
    
    # Convert date to string format if it's datetime
    if pd.api.types.is_datetime64_any_dtype(export_df['date']):
        export_df['date'] = export_df['date'].dt.strftime('%Y-%m-%d')
    else:
        export_df['date'] = pd.to_datetime(export_df['date']).dt.strftime('%Y-%m-%d')
    
    # Ensure columns are in the right order: date, symbol, bias, scenario
    export_df = export_df[['date', 'symbol', 'bias', 'scenario']]
    
    # Export to CSV in the calendar-app public folder
    csv_path = 'calendar-app/public/bias-data.csv'
    export_df.to_csv(csv_path, index=False)
    
    print("=" * 60)
    print("Bias Data Exported to CSV")
    print("=" * 60)
    print(f"‚úÖ Exported {len(export_df):,} rows to: {csv_path}")
    print(f"   Columns: {', '.join(export_df.columns.tolist())}")
    print(f"   Symbols: {', '.join(sorted(export_df['symbol'].unique()))}")
    print()
    print("To use the React calendar app:")
    print("1. Navigate to calendar-app/ directory")
    print("2. Run: npm install (if not done already)")
    print("3. Update src/App.tsx to include '/bias-data.csv' in csvFiles array")
    print("4. Run: npm run dev")
    print("5. Open browser to the URL shown (usually http://localhost:5173)")
    print("=" * 60)
else:
    print("‚ö†Ô∏è No bias_results available. Please run the bias computation cells first.")



Bias Data Exported to CSV
‚úÖ Exported 8,688 rows to: calendar-app/public/bias-data.csv
   Columns: date, symbol, bias, scenario
   Symbols: BTC, ETH, SOL, XRP

To use the React calendar app:
1. Navigate to calendar-app/ directory
2. Run: npm install (if not done already)
3. Update src/App.tsx to include '/bias-data.csv' in csvFiles array
4. Run: npm run dev
5. Open browser to the URL shown (usually http://localhost:5173)


In [None]:
# ============================================================================
# INTERACTIVE CALENDAR WIDGET FOR JUPYTER NOTEBOOK
# ============================================================================
# Note: Requires ipywidgets - install with: pip install ipywidgets

try:
    import ipywidgets as widgets
    from IPython.display import display, HTML, clear_output
    import calendar
    from datetime import datetime
    import pandas as pd
    IPYWIDGETS_AVAILABLE = True
except ImportError:
    print("‚ö†Ô∏è ipywidgets not installed. Install with: pip install ipywidgets")
    print("   Then restart the kernel and run this cell again.")
    IPYWIDGETS_AVAILABLE = False

def create_calendar_widget(bias_df):
    """
    Create an interactive calendar widget for Jupyter notebook.
    
    Parameters:
    -----------
    bias_df : pd.DataFrame
        DataFrame with columns: date, symbol, bias, scenario (and any other columns)
        
    Returns:
    --------
    ipywidgets.Widget
        Interactive calendar widget
    """
    if bias_df is None or len(bias_df) == 0:
        return widgets.HTML("<p>No bias data available</p>")
    
    # Ensure date column is datetime
    bias_df = bias_df.copy()
    if not pd.api.types.is_datetime64_any_dtype(bias_df['date']):
        bias_df['date'] = pd.to_datetime(bias_df['date'])
    
    # Get unique symbols
    symbols = sorted(bias_df['symbol'].unique().tolist())
    
    # Create symbol selector
    symbol_dropdown = widgets.Dropdown(
        options=symbols,
        value=symbols[0] if symbols else None,
        description='Symbol:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='200px')
    )
    
    # Helper function to get date range for a symbol
    def get_symbol_date_range(symbol):
        """Get min and max dates for a specific symbol"""
        symbol_data = bias_df[bias_df['symbol'] == symbol]
        if len(symbol_data) == 0:
            # Fallback to full dataset range
            min_date = bias_df['date'].min()
            max_date = bias_df['date'].max()
        else:
            min_date = symbol_data['date'].min()
            max_date = symbol_data['date'].max()
        return min_date, max_date
    
    # Initialize with first symbol's date range
    initial_symbol = symbols[0] if symbols else None
    if initial_symbol:
        min_date, max_date = get_symbol_date_range(initial_symbol)
        year_options = list(range(min_date.year, max_date.year + 1))
        initial_year = min_date.year
        initial_month = min_date.month
    else:
        # Fallback
        min_date = bias_df['date'].min()
        max_date = bias_df['date'].max()
        year_options = list(range(min_date.year, max_date.year + 1))
        initial_year = min_date.year
        initial_month = min_date.month
    
    # Create month/year selectors
    year_dropdown = widgets.Dropdown(
        options=year_options,
        value=initial_year,
        description='Year:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='150px')
    )
    
    month_dropdown = widgets.Dropdown(
        options=[(calendar.month_name[i], i) for i in range(1, 13)],
        value=initial_month,
        description='Month:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='200px')
    )
    
    # Navigation buttons
    prev_button = widgets.Button(description='‚Üê Prev', layout=widgets.Layout(width='100px'))
    next_button = widgets.Button(description='Next ‚Üí', layout=widgets.Layout(width='100px'))
    
    # Output area for calendar and details
    calendar_output = widgets.Output()
    details_output = widgets.Output()
    
    # Selected date info
    selected_date_info = widgets.HTML("")
    
    def get_bias_color(bias):
        """Get color for bias type"""
        if bias == 'bullish':
            return '#4caf50'  # Green
        elif bias == 'bearish':
            return '#f44336'  # Red
        else:
            return '#e0e0e0'  # Grey
    
    
    def show_date_details(date_str, symbol, bias, scenario):
        """Display details for selected date"""
        # Find the full row data
        symbol_data = bias_df[bias_df['symbol'] == symbol].copy()
        date_obj = pd.to_datetime(date_str)
        date_key = date_obj.strftime('%Y-%m-%d')
        
        # Try to find matching row
        matching_rows = symbol_data[symbol_data['date'].dt.strftime('%Y-%m-%d') == date_key]
        
        if len(matching_rows) > 0:
            row = matching_rows.iloc[0]
            details_html = f"""
            <div style="background: #f8f9fa; border-radius: 12px; padding: 20px; margin-top: 20px; border: 2px solid #e0e0e0;">
                <h3 style="color: #333; margin-bottom: 15px; font-size: 18px;">Daily Bias Details</h3>
                <div style="display: flex; flex-direction: column; gap: 12px;">
                    <div style="display: flex; justify-content: space-between;">
                        <span style="font-weight: 600; color: #666;">Symbol:</span>
                        <span style="color: #333;">{symbol}</span>
                    </div>
                    <div style="display: flex; justify-content: space-between;">
                        <span style="font-weight: 600; color: #666;">Date:</span>
                        <span style="color: #333;">{date_obj.strftime('%B %d, %Y')}</span>
                    </div>
                    <div style="display: flex; justify-content: space-between;">
                        <span style="font-weight: 600; color: #666;">Bias:</span>
                        <span style="background-color: {get_bias_color(bias)}; color: white; padding: 4px 12px; border-radius: 12px; font-weight: 600; font-size: 12px; text-transform: uppercase;">
                            {bias.upper()}
                        </span>
                    </div>
            """
            
            # Add other columns
            excluded_cols = ['symbol', 'date', 'bias']
            has_extra = False
            for col in row.index:
                if col not in excluded_cols and pd.notna(row[col]):
                    has_extra = True
                    value = row[col]
                    if isinstance(value, (int, float)):
                        value = f"{value:,.2f}" if isinstance(value, float) else f"{value:,}"
                    details_html += f"""
                    <div style="display: flex; justify-content: space-between;">
                        <span style="font-weight: 600; color: #666;">{col.capitalize()}:</span>
                        <span style="color: #333;">{value}</span>
                    </div>
                    """
            
            details_html += """
                </div>
            </div>
            """
        else:
            details_html = f"""
            <div style="background: #f8f9fa; border-radius: 12px; padding: 20px; margin-top: 20px; border: 2px solid #e0e0e0;">
                <h3 style="color: #333; margin-bottom: 15px; font-size: 18px;">Daily Bias Details</h3>
                <div style="color: #999; font-style: italic; text-align: center; padding: 20px;">
                    No bias data available for this date.
                </div>
            </div>
            """
        
        selected_date_info.value = details_html
    
    
    def update_year_month_options(symbol):
        """Update year and month dropdown options based on selected symbol"""
        min_date, max_date = get_symbol_date_range(symbol)
        year_options = list(range(min_date.year, max_date.year + 1))
        
        # Update year dropdown options
        year_dropdown.options = year_options
        
        # Ensure current year value is valid
        current_year = year_dropdown.value
        if current_year not in year_options:
            year_dropdown.value = min_date.year
            month_dropdown.value = min_date.month
        else:
            # If year is valid, check if month needs adjustment
            # If we're at the start year, ensure month is >= min_date.month
            if current_year == min_date.year and month_dropdown.value < min_date.month:
                month_dropdown.value = min_date.month
            # If we're at the end year, ensure month is <= max_date.month
            if current_year == max_date.year and month_dropdown.value > max_date.month:
                month_dropdown.value = max_date.month
    
    def navigate_month(direction):
        """Navigate to previous/next month"""
        year = year_dropdown.value
        month = month_dropdown.value
        
        # Get current symbol's date range to check bounds
        symbol = symbol_dropdown.value
        min_date, max_date = get_symbol_date_range(symbol)
        
        if direction == 'prev':
            if month == 1:
                year -= 1
                month = 12
            else:
                month -= 1
        else:  # next
            if month == 12:
                year += 1
                month = 1
            else:
                month += 1
        
        # Check bounds - make target_date timezone-aware to match min_date/max_date
        target_date = datetime(year, month, 1)
        # Convert to timezone-aware if min_date is timezone-aware
        if hasattr(min_date, 'tz') and min_date.tz is not None:
            target_date = pd.Timestamp(target_date, tz=min_date.tz)
        elif isinstance(min_date, pd.Timestamp) and min_date.tz is not None:
            target_date = pd.Timestamp(target_date, tz=min_date.tz)
        
        if target_date < min_date or target_date > max_date:
            return  # Don't navigate outside data range
        
        # Update dropdowns only if the new values are valid
        if year in year_dropdown.options:
            year_dropdown.value = year
        month_dropdown.value = month
        update_calendar()
    
    def on_symbol_change(change):
        """Handle symbol change - update year/month options and calendar"""
        symbol = change['new']
        update_year_month_options(symbol)
        update_calendar()
    
    
    def create_clickable_calendar():
        """Create calendar with clickable dates that update details"""
        symbol = symbol_dropdown.value
        year = year_dropdown.value
        month = month_dropdown.value
        
        symbol_data = bias_df[bias_df['symbol'] == symbol].copy()
        date_map = {}
        for _, row in symbol_data.iterrows():
            date_key = row['date'].strftime('%Y-%m-%d')
            date_map[date_key] = row.to_dict()
        
        first_day = datetime(year, month, 1)
        first_weekday = first_day.weekday()
        days_in_month = calendar.monthrange(year, month)[1]
        
        # Generate calendar with click handlers that directly update the details
        calendar_html = f"""
        
        <style>
            .calendar-container {
                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
                max-width: 700px;
                margin: 20px auto;
                background: white;
                border-radius: 12px;
                padding: 20px;
                box-shadow: 0 2px 8px rgba(0,0,0,0.1);
            }
            .calendar-header {
                text-align: center;
                font-size: 20px;
                font-weight: 600;
                margin-bottom: 15px;
                color: #333;
            }
            .calendar-weekdays {
                display: grid !important;
                grid-template-columns: repeat(7, 1fr) !important;
                gap: 5px;
                margin-bottom: 10px;
            }
            .weekday {
                text-align: center;
                font-weight: 600;
                color: #666;
                font-size: 12px;
                padding: 8px 0;
            }
            .calendar-grid {
                display: grid !important;
                grid-template-columns: repeat(7, 1fr) !important;
                grid-auto-rows: minmax(40px, auto);
                gap: 5px;
                width: 100%;
                max-width: 100%;
            }
            .calendar-day {
                aspect-ratio: 1;
                min-height: 40px;
                min-width: 40px;
                display: flex;
                align-items: center;
                justify-content: center;
                border-radius: 8px;
                font-size: 14px;
                font-weight: 500;
                cursor: pointer;
                transition: all 0.2s;
                border: 2px solid transparent;
                color: #333;
                box-sizing: border-box;
            }
            .calendar-day.empty {
                background: transparent;
                cursor: default;
            }
            .calendar-day.has-data:hover {
                transform: scale(1.1);
                box-shadow: 0 2px 8px rgba(0,0,0,0.2);
                z-index: 10;
            }
            .calendar-day.selected {
                border-color: #667eea;
                border-width: 3px;
                transform: scale(1.1);
            }
        </style>
        <div class="calendar-container">
            <div class="calendar-header">{calendar.month_name[month]} {year}</div>
            <div class="calendar-weekdays">
                <div class="weekday">Mo</div><div class="weekday">Tu</div><div class="weekday">We</div>
                <div class="weekday">Th</div><div class="weekday">Fr</div><div class="weekday">Sa</div><div class="weekday">Su</div>
            </div>
            <div class="calendar-grid">
        """
        
        for i in range(first_weekday):
            calendar_html += '<div class="calendar-day empty"></div>'
        
        for day in range(1, days_in_month + 1):
            date = datetime(year, month, day)
            date_key = date.strftime('%Y-%m-%d')
            bias_data = date_map.get(date_key)
            
            if bias_data:
                bias = bias_data.get('bias', 'neutral')
                color = get_bias_color(bias)
                # Create inline onclick that updates the details div
                row_dict = bias_data
                details_json = str(row_dict).replace("'", "\\'")
                calendar_html += f'''
                <div class="calendar-day has-data" 
                     style="background-color: {color}; color: white;"
                     onclick="updateDetails_{symbol}_{date_key.replace('-', '_')}()">
                    {day}
                </div>
                <script>
                    function updateDetails_{symbol}_{date_key.replace('-', '_')}() {{
                        document.querySelectorAll('.calendar-day').forEach(el => el.classList.remove('selected'));
                        event.target.classList.add('selected');
                        var detailsDiv = document.getElementById('date-details-{symbol}');
                        if (detailsDiv) {{
                            var rowData = {row_dict};
                            var html = '<div style="background: #f8f9fa; border-radius: 12px; padding: 20px; margin-top: 20px; border: 2px solid #e0e0e0;"><h3 style="color: #333; margin-bottom: 15px;">Daily Bias Details</h3>';
                            html += '<div style="display: flex; flex-direction: column; gap: 12px;">';
                            html += '<div style="display: flex; justify-content: space-between;"><span style="font-weight: 600; color: #666;">Symbol:</span><span>' + '{symbol}' + '</span></div>';
                            html += '<div style="display: flex; justify-content: space-between;"><span style="font-weight: 600; color: #666;">Date:</span><span>' + '{date.strftime("%B %d, %Y")}' + '</span></div>';
                            html += '<div style="display: flex; justify-content: space-between;"><span style="font-weight: 600; color: #666;">Bias:</span><span style="background-color: {color}; color: white; padding: 4px 12px; border-radius: 12px; font-weight: 600; font-size: 12px; text-transform: uppercase;">' + '{bias.upper()}' + '</span></div>';
                            if (rowData.scenario !== undefined && rowData.scenario !== null && !isNaN(rowData.scenario)) {{
                                html += '<div style="display: flex; justify-content: space-between;"><span style="font-weight: 600; color: #666;">Scenario:</span><span>' + rowData.scenario + '</span></div>';
                            }}
                            html += '</div></div>';
                            detailsDiv.innerHTML = html;
                        }}
                    }}
                </script>
                '''
            else:
                calendar_html += f'<div class="calendar-day" style="background-color: #e0e0e0; color: #666;">{day}</div>'
        
        calendar_html += '</div></div>'
        return calendar_html
    
    
    def update_calendar(change=None):
        """Update calendar display"""
        with calendar_output:
            clear_output(wait=True)
            html = create_clickable_calendar()
            # Add details container
            html += f'<div id="date-details-{symbol_dropdown.value}" style="margin-top: 20px;"></div>'
            display(HTML(html))
    
    
    # Set up event handlers
    symbol_dropdown.observe(on_symbol_change, names='value')
    year_dropdown.observe(update_calendar, names='value')
    month_dropdown.observe(update_calendar, names='value')
    prev_button.on_click(lambda b: navigate_month('prev'))
    next_button.on_click(lambda b: navigate_month('next'))
    
    # Initial render
    update_calendar()
    
    # Create layout
    controls = widgets.HBox([
        symbol_dropdown,
        widgets.HBox([prev_button, next_button]),
        year_dropdown,
        month_dropdown
    ], layout=widgets.Layout(justify_content='space-between', margin='10px 0'))
    
    # Legend
    legend = widgets.HTML("""
    <div style="text-align: center; margin: 10px 0;">
        <span style="display: inline-block; margin: 0 15px;">
            <span style="display: inline-block; width: 20px; height: 20px; background: #4caf50; border-radius: 4px; vertical-align: middle; margin-right: 5px;"></span>
            Bullish
        </span>
        <span style="display: inline-block; margin: 0 15px;">
            <span style="display: inline-block; width: 20px; height: 20px; background: #f44336; border-radius: 4px; vertical-align: middle; margin-right: 5px;"></span>
            Bearish
        </span>
        <span style="display: inline-block; margin: 0 15px;">
            <span style="display: inline-block; width: 20px; height: 20px; background: #e0e0e0; border-radius: 4px; vertical-align: middle; margin-right: 5px;"></span>
            Neutral
        </span>
    </div>
    """)
    
    # Create a simpler click handler using JavaScript that updates an HTML element
    # We'll use a callback approach with ipywidgets
    
    
    return widgets.VBox([
        controls,
        legend,
        calendar_output
    ])

# Create and display the calendar widget
if IPYWIDGETS_AVAILABLE:
    print("Creating interactive calendar widget...")
    calendar_widget = create_calendar_widget(bias_results)
    display(calendar_widget)
else:
    print("Cannot create calendar widget - ipywidgets not available.")


Creating interactive calendar widget...


VBox(children=(HBox(children=(Dropdown(description='Symbol:', layout=Layout(width='200px'), options=('BTC', 'E‚Ä¶

In [25]:
# Old calendar code removed - using React calendar app in calendar-app/ directory instead


In [26]:
# Old calendar code removed - using React calendar app in calendar-app/ directory instead


In [27]:
# Old calendar display code removed


In [28]:
# Old calendar display code removed


In [29]:
# Old calendar display code removed


In [30]:
# Old calendar display code removed
