# Live Polymarket Trading Data Streaming and Visualization

This notebook streams live data from the polymarket_trades.csv file as it's being written to by another service, and displays it in a stacked bar chart with line overlay.

## Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from datetime import datetime, timedelta
import os
from collections import deque
import time
import asyncio
import nest_asyncio
from IPython.display import display, clear_output
import warnings
warnings.filterwarnings('ignore')

# Allow nested asyncio in Jupyter
nest_asyncio.apply()

# Enable interactive plots
%matplotlib widget

## Configuration

In [None]:
# File path to monitor
CSV_FILE_PATH = '/home/jonathanmines/Documents/code/signal_drift_project/SignalDrift/data/polymarket_trades.csv'

# Visualization settings
WINDOW_SIZE = 50  # Number of time points to display
UPDATE_INTERVAL = 1000  # Milliseconds between updates
TIME_WINDOW = 60  # Seconds of data to show

# Asset configuration (based on the two asset IDs in the data)
ASSET_NAMES = {
    '5899348323059455657630528606815138588415095483052205208567031442326527433126': 'Asset A',
    '76440473074388722537799893602404687230922238288182630419521127770642474448937': 'Asset B'
}

## CSV File Monitor Class

In [None]:
class CSVFileMonitor:
    """Monitor a CSV file for new data and stream updates"""
    
    def __init__(self, file_path, check_interval=0.5):
        self.file_path = file_path
        self.check_interval = check_interval
        self.last_position = 0
        self.last_size = 0
        
    def get_new_data(self):
        """Read new data from the CSV file since last check"""
        try:
            # Check if file exists
            if not os.path.exists(self.file_path):
                return None
            
            # Get current file size
            current_size = os.path.getsize(self.file_path)
            
            # If file hasn't grown, no new data
            if current_size <= self.last_size:
                return None
            
            # Read the entire file to get new rows
            df = pd.read_csv(self.file_path)
            
            # Get only new rows
            if self.last_position < len(df):
                new_data = df.iloc[self.last_position:].copy()
                self.last_position = len(df)
                self.last_size = current_size
                return new_data
            
            return None
            
        except Exception as e:
            print(f"Error reading file: {e}")
            return None
    
    def get_all_data(self):
        """Get all data from the CSV file"""
        try:
            if os.path.exists(self.file_path):
                df = pd.read_csv(self.file_path)
                self.last_position = len(df)
                self.last_size = os.path.getsize(self.file_path)
                return df
            return pd.DataFrame()
        except Exception as e:
            print(f"Error reading file: {e}")
            return pd.DataFrame()

## Data Processing Functions

In [None]:
def process_trade_data(df):
    """Process raw trade data for visualization"""
    if df.empty:
        return pd.DataFrame()
    
    # Clean column names
    df.columns = df.columns.str.strip()
    
    # Convert timestamp to datetime
    df['datetime'] = pd.to_datetime(df['timestamp'], unit='ms')
    
    # Map asset IDs to names
    df['asset_name'] = df['asset_id'].map(ASSET_NAMES)
    
    # Aggregate by time window (e.g., 1 second)
    df['time_bin'] = df['datetime'].dt.floor('1s')
    
    # Group by time and asset to get average prices
    grouped = df.groupby(['time_bin', 'asset_name']).agg({
        'price': 'mean',
        'size': 'sum',
        'timestamp': 'count'  # Count of trades
    }).reset_index()
    
    grouped.rename(columns={'timestamp': 'trade_count'}, inplace=True)
    
    return grouped

def prepare_visualization_data(grouped_df, current_time):
    """Prepare data for stacked bar chart visualization"""
    if grouped_df.empty:
        return None
    
    # Filter to recent time window
    time_threshold = current_time - timedelta(seconds=TIME_WINDOW)
    recent_data = grouped_df[grouped_df['time_bin'] >= time_threshold]
    
    if recent_data.empty:
        return None
    
    # Pivot to get assets as columns
    pivot_data = recent_data.pivot_table(
        index='time_bin',
        columns='asset_name',
        values='price',
        aggfunc='mean'
    ).fillna(method='ffill').fillna(0)
    
    return pivot_data

## Live Visualization Class

In [None]:
class LivePolymarketVisualizer:
    """Create and update live visualization of Polymarket trades"""
    
    def __init__(self, csv_monitor):
        self.csv_monitor = csv_monitor
        self.all_data = pd.DataFrame()
        
        # Initialize plot
        plt.style.use('seaborn-v0_8-darkgrid')
        self.fig, self.ax = plt.subplots(figsize=(12, 8))
        
        # Colors for assets
        self.colors = ['#1f77b4', '#ff7f0e']  # Blue and orange
        
        # Initialize empty bars and line
        self.bars = None
        self.line = None
        
        # Configure axes
        self.ax.set_ylim(0, 1)
        self.ax.set_ylabel('Probability', fontsize=12)
        self.ax.set_xlabel('DateTime', fontsize=12)
        self.ax.set_title('Live Polymarket Trading Data - Asset Probabilities', fontsize=14, fontweight='bold')
        
        # Add grid
        self.ax.grid(True, alpha=0.3)
        
        # Create legend
        self.ax.legend(['Asset A', 'Asset B', 'Combined'], loc='upper right')
        
        # Add value annotations
        self.value_text = self.ax.text(0.02, 0.95, '', transform=self.ax.transAxes,
                                      bbox=dict(boxstyle='round', facecolor='white', alpha=0.8),
                                      verticalalignment='top')
        
    def update_data(self):
        """Fetch and process new data"""
        # Get new data from CSV
        new_data = self.csv_monitor.get_new_data()
        
        if new_data is not None and not new_data.empty:
            # Append to all data
            if self.all_data.empty:
                self.all_data = new_data
            else:
                self.all_data = pd.concat([self.all_data, new_data], ignore_index=True)
            
            # Keep only recent data to manage memory
            if len(self.all_data) > 10000:
                self.all_data = self.all_data.tail(5000)
    
    def animate(self, frame):
        """Animation function for matplotlib"""
        # Update data
        self.update_data()
        
        if self.all_data.empty:
            return
        
        # Process data
        processed = process_trade_data(self.all_data)
        if processed.empty:
            return
        
        # Prepare visualization data
        current_time = pd.Timestamp.now()
        viz_data = prepare_visualization_data(processed, current_time)
        
        if viz_data is None or viz_data.empty:
            return
        
        # Clear previous plot
        self.ax.clear()
        
        # Create stacked bar chart
        time_indices = viz_data.index
        
        # Prepare data for stacking
        bottom = np.zeros(len(time_indices))
        
        # Plot bars for each asset
        for i, asset in enumerate(['Asset A', 'Asset B']):
            if asset in viz_data.columns:
                values = viz_data[asset].values
                self.ax.bar(time_indices, values, bottom=bottom, 
                           label=asset, color=self.colors[i], alpha=0.7, width=0.8)
                bottom += values
        
        # Plot combined line (sum of probabilities)
        if 'Asset A' in viz_data.columns and 'Asset B' in viz_data.columns:
            combined = viz_data['Asset A'] + viz_data['Asset B']
            self.ax.plot(time_indices, combined, 'k-', linewidth=2, 
                        marker='o', markersize=4, label='Combined')
        
        # Update labels and formatting
        self.ax.set_ylim(0, 1.2)
        self.ax.set_ylabel('Probability', fontsize=12)
        self.ax.set_xlabel('DateTime', fontsize=12)
        self.ax.set_title('Live Polymarket Trading Data - Asset Probabilities', 
                         fontsize=14, fontweight='bold')
        
        # Rotate x-axis labels
        plt.setp(self.ax.xaxis.get_majorticklabels(), rotation=45, ha='right')
        
        # Add legend
        self.ax.legend(loc='upper right')
        
        # Add grid
        self.ax.grid(True, alpha=0.3)
        
        # Update value display
        if not viz_data.empty:
            latest_time = viz_data.index[-1]
            value_text = f"Latest Values ({latest_time.strftime('%H:%M:%S')})\n"
            
            if 'Asset A' in viz_data.columns:
                value_text += f"Asset A: {viz_data['Asset A'].iloc[-1]:.3f}\n"
            if 'Asset B' in viz_data.columns:
                value_text += f"Asset B: {viz_data['Asset B'].iloc[-1]:.3f}\n"
            
            trade_count = len(self.all_data)
            value_text += f"\nTotal Trades: {trade_count}"
            
            self.value_text = self.ax.text(0.02, 0.95, value_text, 
                                         transform=self.ax.transAxes,
                                         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8),
                                         verticalalignment='top')
        
        # Adjust layout
        plt.tight_layout()
    
    def start(self):
        """Start the live animation"""
        # Load initial data
        self.all_data = self.csv_monitor.get_all_data()
        
        # Create animation
        self.anim = FuncAnimation(self.fig, self.animate, interval=UPDATE_INTERVAL, 
                                 blit=False, cache_frame_data=False)
        
        return self.anim

## Run Live Streaming Visualization

In [None]:
# Initialize CSV monitor
csv_monitor = CSVFileMonitor(CSV_FILE_PATH)

# Check if file exists
if not os.path.exists(CSV_FILE_PATH):
    print(f"Warning: CSV file not found at {CSV_FILE_PATH}")
    print("Please ensure the file path is correct and the data service is running.")
else:
    print(f"Monitoring file: {CSV_FILE_PATH}")
    print(f"File size: {os.path.getsize(CSV_FILE_PATH)} bytes")
    
    # Create visualizer
    visualizer = LivePolymarketVisualizer(csv_monitor)
    
    # Start live visualization
    print("Starting live visualization...")
    print("The chart will update automatically as new data is written to the CSV file.")
    
    animation = visualizer.start()

## Alternative: Simple Dashboard with Statistics

In [None]:
async def run_dashboard(duration_seconds=60):
    """Run a simple text-based dashboard"""
    monitor = CSVFileMonitor(CSV_FILE_PATH)
    start_time = time.time()
    
    print("Starting live dashboard...")
    print("=" * 60)
    
    while (time.time() - start_time) < duration_seconds:
        # Get all data
        all_data = monitor.get_all_data()
        
        if not all_data.empty:
            # Process data
            processed = process_trade_data(all_data)
            
            clear_output(wait=True)
            
            print(f"LIVE POLYMARKET DATA DASHBOARD - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
            print("=" * 60)
            
            # Latest values
            if not processed.empty:
                latest_time = processed['time_bin'].max()
                latest_data = processed[processed['time_bin'] == latest_time]
                
                print("\nLatest Prices:")
                for _, row in latest_data.iterrows():
                    print(f"  {row['asset_name']}: {row['price']:.4f} (Size: {row['size']:.2f})")
            
            # Statistics
            print(f"\nStatistics:")
            print(f"  Total trades: {len(all_data)}")
            print(f"  Time range: {all_data['datetime'].min()} to {all_data['datetime'].max()}")
            
            # Price ranges by asset
            print("\nPrice Ranges:")
            for asset_id, asset_name in ASSET_NAMES.items():
                asset_data = all_data[all_data['asset_id'] == asset_id]
                if not asset_data.empty:
                    print(f"  {asset_name}:")
                    print(f"    Min: {asset_data['price'].min():.4f}")
                    print(f"    Max: {asset_data['price'].max():.4f}")
                    print(f"    Mean: {asset_data['price'].mean():.4f}")
                    print(f"    Std: {asset_data['price'].std():.4f}")
            
            print("\n" + "=" * 60)
            print(f"Monitoring for {duration_seconds - (time.time() - start_time):.0f} more seconds...")
        
        await asyncio.sleep(2)
    
    print("\nDashboard monitoring complete.")

# Run the dashboard for 60 seconds
# await run_dashboard(60)

## Save Processed Data

In [None]:
# Function to save processed data for analysis
def save_processed_data(csv_monitor, output_path='processed_polymarket_data.csv'):
    """Load, process, and save the data"""
    all_data = csv_monitor.get_all_data()
    
    if not all_data.empty:
        processed = process_trade_data(all_data)
        processed.to_csv(output_path, index=False)
        print(f"Processed data saved to: {output_path}")
        print(f"Total records: {len(processed)}")
        return processed
    else:
        print("No data to process.")
        return None

# Example usage:
# processed_data = save_processed_data(csv_monitor)

## Summary

This notebook provides:

1. **Live CSV Monitoring**: Continuously monitors the polymarket_trades.csv file for new data
2. **Real-time Visualization**: Displays a stacked bar chart with:
   - Asset A and Asset B probabilities as stacked bars
   - Combined probability as a line overlay
   - Live updates as new trades come in
3. **Dashboard Alternative**: Text-based dashboard showing statistics and latest values
4. **Data Processing**: Aggregates trades by time bins and calculates average prices

The visualization matches the wireframe with:
- Y-axis from 0 to 1 (probability)
- X-axis showing datetime
- Stacked bars for both assets
- Line showing combined values
- Legend and value display