In [2]:
import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output, State
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np

In [3]:
PLOTLY_TEMPLATE = "plotly_white"  # or "plotly_dark", "ggplot2", "seaborn", etc.
DBC_THEME = dbc.themes.LUX # Options: LUX, PULSE, CYBORG, DARKLY, MINTY, etc.

In [4]:
def create_dummy_csv_if_not_exists(filename="model_findings_summary.csv"):
    try:
        pd.read_csv(filename)
        print(f"'{filename}' found. Using existing data.")
    except FileNotFoundError:
        print(f"'{filename}' not found. Generating a dummy CSV for demonstration.")
        n_rows = 1000
        n_stock_ids = 3
        n_time_ids_per_stock = 5
        n_models = 2

        data = []
        stock_ids_list = [f"stock_{i}" for i in range(n_stock_ids)]
        model_names_list = [f"Transformer_v{i}" for i in range(1, n_models + 1)]

        for stock_idx, stock_id_val in enumerate(stock_ids_list):
            time_id_base = stock_idx * 1000
            for time_idx in range(n_time_ids_per_stock):
                time_id_val = time_id_base + time_idx
                for model_name_val in model_names_list:
                    # Generate some base metrics for this model/stock combination
                    base_mse = np.random.uniform(0.001, 0.01)
                    base_qlike = np.random.uniform(0.01, 0.1)
                    base_r2 = np.random.uniform(0.5, 0.9)

                    for _ in range(n_rows // (n_stock_ids * n_time_ids_per_stock * n_models)):
                        true_vol_val = np.random.uniform(0.01, 0.2)
                        pred_error = np.random.normal(0, 0.02)
                        pred_vol_val = true_vol_val + pred_error
                        pred_vol_val = max(0.001, pred_vol_val) # Ensure positivity

                        data.append({
                            'stock_id': stock_id_val,
                            'time_id': time_id_val,
                            'model_name': model_name_val,
                            'mse': base_mse * np.random.uniform(0.9, 1.1), # Slight variation
                            'qlike': base_qlike * np.random.uniform(0.9, 1.1),
                            'r^2': base_r2 * np.random.uniform(0.9, 1.1),
                            'pred_vol': pred_vol_val,
                            'true_vol': true_vol_val
                        })
        
        df_dummy = pd.DataFrame(data)
        df_dummy.to_csv(filename, index=False)
        print(f"Dummy '{filename}' created with {len(df_dummy)} rows.")

In [5]:
CSV_FILE = "/Users/ayush/Documents/University/Year 03/Sem 01/DATA3888/Optiver-07/Dash WebApp/Data/Transformer/1.csv"
create_dummy_csv_if_not_exists(CSV_FILE) # Create dummy if not present
df = pd.read_csv(CSV_FILE)
df['error'] = df['true_vol'] - df['pred_vol']

# Ensure correct dtypes for dropdowns
df['stock_id'] = df['stock_id'].astype(str)
df['time_id'] = df['time_id'].astype(str)
df['model_name'] = df['model_name'].astype(str)


'/Users/ayush/Documents/University/Year 03/Sem 01/DATA3888/Optiver-07/Dash WebApp/Data/Transformer/1.csv' found. Using existing data.


In [6]:
app = dash.Dash(__name__, external_stylesheets=[DBC_THEME])
app.title = "Volatility Model Analyzer"

In [7]:
app.layout = dbc.Container(fluid=True, className="py-4", children=[
    # Header
    dbc.Row(dbc.Col(html.H1("Volatility Model Performance Analyzer", className="text-center text-primary mb-4"))),

    # Controls and Plots
    dbc.Row([
        # Control Panel Column
        dbc.Col(width=12, lg=3, className="mb-4", children=[
            dbc.Card(body=True, className="shadow-sm", children=[
                html.H4("Filters", className="card-title text-secondary mb-3"),
                dbc.Label("Select Stock ID:", html_for="stock-id-dropdown"),
                dcc.Dropdown(
                    id='stock-id-dropdown',
                    options=[{'label': i, 'value': i} for i in sorted(df['stock_id'].unique())],
                    value=sorted(df['stock_id'].unique())[0] if df['stock_id'].nunique() > 0 else None,
                    clearable=False,
                    className="mb-3"
                ),
                dbc.Label("Select Time ID:", html_for="time-id-dropdown"),
                dcc.Dropdown(
                    id='time-id-dropdown',
                    # Options populated by callback
                    clearable=False,
                    className="mb-3"
                ),
                dbc.Label("Select Model Name:", html_for="model-name-dropdown"),
                dcc.Dropdown(
                    id='model-name-dropdown',
                    # Options populated by callback
                    clearable=False,
                    className="mb-3"
                ),
                html.Div(id='metrics-summary-display', className="mt-3 small")
            ])
        ]),

        # Plots Column
        dbc.Col(width=12, lg=9, children=[
            dbc.Row([
                dbc.Col(md=6, className="mb-4", children=[
                    dbc.Card(body=True, className="shadow-sm h-100", children=[
                        html.H5("Predicted vs. True Volatility", className="card-title text-info"),
                        dcc.Loading(dcc.Graph(id='scatter-pred-true', config={'displayModeBar': False}))
                    ])
                ]),
                dbc.Col(md=6, className="mb-4", children=[
                    dbc.Card(body=True, className="shadow-sm h-100", children=[
                        html.H5("Volatility Over Time (Selected Session)", className="card-title text-info"),
                        dcc.Loading(dcc.Graph(id='line-time-series', config={'displayModeBar': False}))
                    ])
                ]),
            ]),
            dbc.Row([
                dbc.Col(md=6, className="mb-4", children=[
                    dbc.Card(body=True, className="shadow-sm h-100", children=[
                        html.H5("Model Performance Metrics", className="card-title text-info"),
                        dcc.Loading(dcc.Graph(id='bar-metrics', config={'displayModeBar': False}))
                    ])
                ]),
                dbc.Col(md=6, className="mb-4", children=[
                    dbc.Card(body=True, className="shadow-sm h-100", children=[
                        html.H5("Prediction Error Distribution", className="card-title text-info"),
                        dcc.Loading(dcc.Graph(id='hist-error', config={'displayModeBar': False}))
                    ])
                ]),
            ]),
        ]),
    ]),
    
    # Footer or additional info
    dbc.Row(dbc.Col(html.P("Interactive dashboard to analyze model predictions.", className="text-center text-muted small mt-4"))),
])

In [8]:
# Update Time ID dropdown based on Stock ID
@app.callback(
    Output('time-id-dropdown', 'options'),
    Output('time-id-dropdown', 'value'),
    Input('stock-id-dropdown', 'value')
)
def set_time_id_options(selected_stock_id):
    if not selected_stock_id:
        return [], None
    filtered_df = df[df['stock_id'] == selected_stock_id]
    time_ids = sorted(filtered_df['time_id'].unique())
    options = [{'label': i, 'value': i} for i in time_ids]
    value = time_ids[0] if time_ids else None
    return options, value

In [9]:
@app.callback(
    Output('model-name-dropdown', 'options'),
    Output('model-name-dropdown', 'value'),
    Input('stock-id-dropdown', 'value'),
    Input('time-id-dropdown', 'value')
)
def set_model_name_options(selected_stock_id, selected_time_id):
    if not selected_stock_id or not selected_time_id:
        return [], None
    filtered_df = df[(df['stock_id'] == selected_stock_id) & (df['time_id'] == selected_time_id)]
    model_names = sorted(filtered_df['model_name'].unique())
    options = [{'label': i, 'value': i} for i in model_names]
    # Add an "All Models" option for plots that can handle it
    if len(model_names) > 1:
         options = [{'label': 'All Selected Models', 'value': 'ALL_MODELS'}] + options
    value = 'ALL_MODELS' if len(model_names) > 1 else (model_names[0] if model_names else None)
    return options, value

In [10]:
@app.callback(
    Output('scatter-pred-true', 'figure'),
    Output('line-time-series', 'figure'),
    Output('bar-metrics', 'figure'),
    Output('hist-error', 'figure'),
    Output('metrics-summary-display', 'children'),
    Input('stock-id-dropdown', 'value'),
    Input('time-id-dropdown', 'value'),
    Input('model-name-dropdown', 'value')
)
def update_graphs_and_summary(selected_stock_id, selected_time_id, selected_model_name):
    if not selected_stock_id or not selected_time_id or not selected_model_name:
        empty_fig = go.Figure().update_layout(template=PLOTLY_TEMPLATE, paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)")
        empty_fig.add_annotation(text="Please make selections in all dropdowns.", xref="paper", yref="paper", showarrow=False, font=dict(size=14))
        return empty_fig, empty_fig, empty_fig, empty_fig, "Select filters to see data."

    # Filter data based on selections
    base_filtered_df = df[(df['stock_id'] == selected_stock_id) & (df['time_id'] == selected_time_id)]
    
    if selected_model_name == 'ALL_MODELS':
        plot_df = base_filtered_df.copy()
    else:
        plot_df = base_filtered_df[base_filtered_df['model_name'] == selected_model_name].copy()

    if plot_df.empty:
        empty_fig = go.Figure().update_layout(template=PLOTLY_TEMPLATE, paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)")
        empty_fig.add_annotation(text="No data for current selection.", xref="paper", yref="paper", showarrow=False, font=dict(size=14))
        return empty_fig, empty_fig, empty_fig, empty_fig, "No data for current selection."

    # --- 1. Scatter Plot: Predicted vs. True ---
    fig_scatter = px.scatter(
        plot_df, x='true_vol', y='pred_vol', 
        color='model_name' if selected_model_name == 'ALL_MODELS' else None,
        labels={'true_vol': 'True Volatility', 'pred_vol': 'Predicted Volatility'},
        template=PLOTLY_TEMPLATE,
        marginal_y="histogram", marginal_x="histogram",
        hover_data=['stock_id', 'time_id', 'model_name']
    )
    min_val = min(plot_df['true_vol'].min(), plot_df['pred_vol'].min())
    max_val = max(plot_df['true_vol'].max(), plot_df['pred_vol'].max())
    fig_scatter.add_shape(type='line', x0=min_val, y0=min_val, x1=max_val, y1=max_val, line=dict(color='Gray', dash='dash'))
    fig_scatter.update_layout(showlegend=True if selected_model_name == 'ALL_MODELS' and plot_df['model_name'].nunique() > 1 else False)


    # --- 2. Line Plot: Volatility Over Time ---
    # For this plot, it's best if a single model is selected.
    # If 'ALL_MODELS' is selected, we might pick the first one or show a message.
    line_plot_df = plot_df
    if selected_model_name == 'ALL_MODELS':
        # If multiple models, pick the first one for the line plot or show all if not too cluttered
        # For simplicity, let's allow multiple lines if 'ALL_MODELS' is chosen.
        pass # plot_df is already correctly filtered or contains all models for the time_id
    
    line_plot_df = line_plot_df.reset_index().rename(columns={'index': 'observation_index'}) # Create an index for x-axis
    
    fig_line = go.Figure()
    models_to_plot = line_plot_df['model_name'].unique()
    colors = px.colors.qualitative.Plotly # Get a color sequence

    for i, model in enumerate(models_to_plot):
        model_df = line_plot_df[line_plot_df['model_name'] == model]
        fig_line.add_trace(go.Scatter(
            x=model_df['observation_index'], y=model_df['true_vol'], 
            mode='lines+markers', name=f'True Vol ({model})' if len(models_to_plot)>1 else 'True Vol',
            line=dict(color=colors[i % len(colors)], dash='dot'), marker=dict(size=4)
        ))
        fig_line.add_trace(go.Scatter(
            x=model_df['observation_index'], y=model_df['pred_vol'], 
            mode='lines+markers', name=f'Pred Vol ({model})' if len(models_to_plot)>1 else 'Pred Vol',
            line=dict(color=colors[i % len(colors)]), marker=dict(size=4)
        ))

    fig_line.update_layout(
        template=PLOTLY_TEMPLATE, 
        xaxis_title='Observation Index (within session)', 
        yaxis_title='Volatility',
        legend_title_text='Legend',
        showlegend=True
    )
    if line_plot_df.empty: # Handle case where line_plot_df becomes empty
         fig_line.add_annotation(text="No data for line plot.", xref="paper", yref="paper", showarrow=False, font=dict(size=14))


    # --- 3. Bar Plot: Model Performance Metrics ---
    # Metrics are global per model (and potentially stock_id).
    # We need to get unique metric values for each model based on the current stock_id.
    metrics_df = df[df['stock_id'] == selected_stock_id]
    if selected_model_name != 'ALL_MODELS':
        metrics_df = metrics_df[metrics_df['model_name'] == selected_model_name]
    
    # Get unique metrics per model for the selected stock_id
    unique_metrics = metrics_df.drop_duplicates(subset=['model_name', 'stock_id'])
    
    if not unique_metrics.empty:
        melted_metrics = unique_metrics.melt(
            id_vars=['model_name'], 
            value_vars=['mse', 'qlike', 'r^2'], 
            var_name='Metric', 
            value_name='Value'
        )
        fig_bar = px.bar(
            melted_metrics, x='model_name', y='Value', color='Metric',
            barmode='group', template=PLOTLY_TEMPLATE,
            labels={'Value': 'Metric Value', 'model_name': 'Model Name'}
        )
        fig_bar.update_layout(showlegend=True)
    else:
        fig_bar = go.Figure().update_layout(template=PLOTLY_TEMPLATE, paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)")
        fig_bar.add_annotation(text="No metric data to display.", xref="paper", yref="paper", showarrow=False, font=dict(size=14))


    # --- 4. Histogram: Prediction Error ---
    fig_hist = px.histogram(
        plot_df, x='error', 
        color='model_name' if selected_model_name == 'ALL_MODELS' else None,
        marginal="box", # or "rug"
        template=PLOTLY_TEMPLATE,
        labels={'error': 'Prediction Error (True - Predicted)'}
    )
    fig_hist.update_layout(showlegend=True if selected_model_name == 'ALL_MODELS' and plot_df['model_name'].nunique() > 1 else False)
    fig_hist.add_vline(x=0, line_width=2, line_dash="dash", line_color="gray", annotation_text="Ideal (No Error)")


    # --- Metrics Summary Display ---
    summary_text = []
    if not plot_df.empty:
        # Display metrics for the specific selected model, or average if 'ALL_MODELS'
        # For simplicity, show for the first model if 'ALL_MODELS' or if single model selected
        display_metrics_df = plot_df.drop_duplicates(subset=['model_name', 'stock_id', 'time_id'])
        
        if not display_metrics_df.empty:
            # Take metrics from the first row of the filtered data (they should be consistent for a given model/stock/time)
            m_name = display_metrics_df['model_name'].iloc[0]
            m_mse = display_metrics_df['mse'].iloc[0]
            m_qlike = display_metrics_df['qlike'].iloc[0]
            m_r2 = display_metrics_df['r^2'].iloc[0]
            
            if selected_model_name == 'ALL_MODELS' and base_filtered_df['model_name'].nunique() > 1:
                 summary_text.append(html.Strong(f"Displaying metrics for: All selected models (metrics shown are from first model in selection for brevity)"))
            else:
                summary_text.append(html.Strong(f"Metrics for {m_name} (Stock: {selected_stock_id}, Time: {selected_time_id}):"))
            
            summary_text.extend([
                html.Br(),
                f"MSE: {m_mse:.6f}", html.Br(),
                f"QLIKE: {m_qlike:.6f}", html.Br(),
                f"R²: {m_r2:.6f}"
            ])
        else:
            summary_text = ["No specific metrics to display for this fine-grained selection."]
    else:
        summary_text = ["No data for current selection."]

    return fig_scatter, fig_line, fig_bar, fig_hist, html.P(summary_text)


In [None]:
app.run(debug=True)