In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt

# Add project root to path
sys.path.append(os.path.abspath('..'))

from src.data_loader import DataLoader
from src.validators.consistency import ConsistencyValidator

%matplotlib inline

## 1. Load Data
Load the portfolio data from the CSV file.

In [None]:
data_path = '../data/Test.csv'
loader = DataLoader(data_path)
positions, trades = loader.load_data()

print(f"Loaded {len(positions)} positions.")

## 2. Run Consistency Validator
We run the validator to check for discrepancies between **Trade Price** and **Market Price** (`Price`).

In [None]:
validator = ConsistencyValidator(positions, trades)
errors = validator.validate()

print(f"Found {len(errors)} consistency errors.")

if errors:
    # Convert to DataFrame for easier analysis
    error_df = pd.DataFrame([vars(e) for e in errors])
    
    # Show distribution of severity
    print("Severity Distribution:")
    print(error_df['severity'].value_counts())
    
    display(error_df.head(10))
else:
    print("No consistency errors found.")

## 3. Visualization: Trade Price vs Market Price
We visualize the relationship between the executed Trade Price and the reported Market Price.
*   **X-Axis**: Market Price
*   **Y-Axis**: Trade Price

Points significantly off the diagonal line indicate potential data quality issues.

In [None]:
if errors:
    # Prepare data for plotting
    # We need to join errors back to the original data or just use the error details if we had them all.
    # But the validator only returns errors. Let's reconstruct the view from positions df.
    
    df = positions.copy()
    cols = ["Price", "Trade Price", "Traded Today"]
    for col in cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')
        
    # Filter for days with trades
    df = df[df["Traded Today"].abs() > 0].copy()
    df = df.dropna(subset=["Price", "Trade Price"])
    
    # Identify error rows
    error_keys = set((e.date, e.ticker) for e in errors)
    severity_map = {(e.date, e.ticker): e.severity for e in errors}
    
    df['Is_Error'] = df.apply(lambda row: (row['Date'], row['P_Ticker']) in error_keys, axis=1)
    df['Severity'] = df.apply(lambda row: severity_map.get((row['Date'], row['P_Ticker']), 'None'), axis=1)
    
    plt.figure(figsize=(12, 12))
    
    # 1. Plot "Valid" points
    valid_data = df[~df['Is_Error']]
    plt.scatter(valid_data["Price"], valid_data["Trade Price"], 
                alpha=0.1, c='blue', label='Valid', s=10)
    
    # 2. Plot Errors by Severity
    high = df[df['Severity'] == 'High']
    med = df[df['Severity'] == 'Medium']
    low = df[df['Severity'] == 'Low']
    
    if not low.empty:
        plt.scatter(low["Price"], low["Trade Price"], c='green', label='Low Severity', s=20, alpha=0.6)
    if not med.empty:
        plt.scatter(med["Price"], med["Trade Price"], c='orange', label='Medium Severity', s=40, alpha=0.8)
    if not high.empty:
        plt.scatter(high["Price"], high["Trade Price"], c='red', label='High Severity', s=60, alpha=1.0)
    
    # Annotate High Severity Errors
    annotated_tickers = set()
    for idx, row in high.iterrows():
        if row['P_Ticker'] not in annotated_tickers:
            plt.annotate(f"{row['P_Ticker']}", 
                         (row['Price'], row['Trade Price']),
                         xytext=(5, 5), textcoords='offset points',
                         fontsize=9, color='darkred', fontweight='bold')
            annotated_tickers.add(row['P_Ticker'])
            if len(annotated_tickers) >= 10: break

    # Diagonal line
    max_val = max(df["Price"].max(), df["Trade Price"].max())
    min_val = min(df["Price"].min(), df["Trade Price"].min())
    
    plt.plot([min_val, max_val], [min_val, max_val], 'k--', alpha=0.5)
    
    plt.title("Consistency Check: Trade Price vs Market Price")
    plt.xlabel("Market Price")
    plt.ylabel("Trade Price")
    plt.legend()
    plt.grid(True)
    
    # Use log scale if range is huge
    if max_val / (min_val + 0.01) > 100:
        plt.xscale('log')
        plt.yscale('log')
    
    plt.show()
else:
    print("No errors to visualize.")

## 4. Time Series Analysis: High Severity Errors
For tickers with high severity errors, we plot the Market Price and Trade Price over time to see if the discrepancy is persistent.

In [None]:
def visualize_ticker_consistency(ticker, error_list, positions_df):
    # Filter errors for this ticker
    ticker_errors = [e for e in error_list if e.ticker == ticker]
    if not ticker_errors:
        return

    print(f"Visualizing consistency errors for: {ticker} (Count: {len(ticker_errors)})")
    
    # Prepare data
    ticker_df = positions_df[positions_df['P_Ticker'] == ticker].copy()
    ticker_df['Date'] = pd.to_datetime(ticker_df['Date'])  # type: ignore
    ticker_df = ticker_df.sort_values('Date')
    
    ticker_df['Price'] = pd.to_numeric(ticker_df['Price'], errors='coerce')
    ticker_df['Trade Price'] = pd.to_numeric(ticker_df['Trade Price'], errors='coerce')
    ticker_df['Traded Today'] = pd.to_numeric(ticker_df['Traded Today'], errors='coerce').fillna(0)
    
    plt.figure(figsize=(14, 5))
    
    plt.plot(ticker_df['Date'], ticker_df['Price'], label='Market Price', color='blue', linewidth=2, alpha=0.7)
    
    # Plot trades
    trades_only = ticker_df[ticker_df['Traded Today'].abs() > 0]
    plt.scatter(trades_only['Date'], trades_only['Trade Price'], label='Trade Price', color='green', marker='o', s=30, alpha=0.6)
    
    # Highlight errors
    error_dates = [e.date for e in ticker_errors]
    error_points = ticker_df[ticker_df['Date'].isin(error_dates)]
    
    if not error_points.empty:
        plt.scatter(error_points['Date'], error_points['Trade Price'], color='red', s=80, zorder=5, label='Consistency Error')
        
        # Draw vertical lines connecting Trade Price to Market Price
        for _, row in error_points.iterrows():
            plt.plot([row['Date'], row['Date']], [row['Trade Price'], row['Price']], 
                     color='red', linestyle=':', linewidth=1.5, alpha=0.6)

    plt.title(f"Consistency Check: {ticker}")
    plt.xlabel("Date")
    plt.ylabel("Price")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

# Visualize all found errors (since we only have a few)
if errors:
    # Get unique tickers with errors
    error_tickers = list(set(e.ticker for e in errors))
    
    print(f"Visualizing {len(error_tickers)} tickers with consistency errors.")
    for ticker in error_tickers:
        visualize_ticker_consistency(ticker, errors, positions)
else:
    print("No errors found.")