In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "# Refined Monster Notebook: Interactive Per-Turn Drift Analysis (V2.1 CSV - Focused)\n",
    "\n",
    "Visualizing the dataset from `per_turn_drift_v2.csv`, focusing on core drift metrics\n",
    "and comparisons between models and system prompts."
   ],
   "metadata": {}
  },
    "# Refined Monster Notebook: Interactive Per-Turn Drift Analysis (V2.1 CSV - Focused)\n",
    "\n",
    "Visualizing the dataset from `per_turn_drift_v2.csv`, focusing on core drift metrics\n",
    "and comparisons between models and system prompts."
   ],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Setup and Imports"
   ]
  },

   "metadata": {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "from plotly.subplots import make_subplots\n",
    "import ipywidgets as widgets\n",
    "from IPython.display import display, HTML, clear_output\n",
    "import re\n",
    "from wordcloud import WordCloud\n",
    "import matplotlib.pyplot as plt\n",
    "import io\n",
    "import base64\n",
    "from collections import Counter\n",
    "import nltk\n",
    "from nltk.corpus import stopwords\n",
    "try:\n",
    "    stop_words = set(stopwords.words('english'))\n",
    "except LookupError:\n",
    "    nltk.download('stopwords')\n",
    "    stop_words = set(stopwords.words('english'))\n",
    "\n",
    "from sklearn.preprocessing import MultiLabelBinarizer\n",
    "\n",
    "plotly_template = \"plotly_white\"\n",
    "print(\"Libraries imported.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 2. Data Loading and Enhanced Preprocessing"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Data Loading ---\n",
    "# IMPORTANT: Replace 'YOUR_ABSOLUTE_PATH_HERE' with the actual absolute path to your CSV file\n",
    "# OR ensure the relative path is correct from where your Jupyter server is running.\n",
    "csv_file_path = 'logs/summary/per_turn_drift_v2.csv' # Default relative path\n",
    "# csv_file_path = '/Users/redhat/Documents/GitHub/LLM-RED-CORE/logs/summary/per_turn_drift_v2.csv' # Example absolute path\n",
    "\n",
    "try:\n",
    "    df_original = pd.read_csv(csv_file_path)\n",
    "    print(f\"Successfully loaded '{csv_file_path}'\")\n",
    "except FileNotFoundError:\n",
    "    print(f\"ERROR: '{csv_file_path}' not found. Please check the path and ensure the file exists.\")\n",
    "    print(\"Falling back to a minimal empty DataFrame structure. Most visualizations will be empty.\")\n",
    "    print(\"Ensure your export_per_turn_drift.py script has run successfully and created the CSV.\")\n",
    "    # Define columns expected by the rest of the notebook to avoid KeyErrors\n",
    "    expected_cols = ['filename', 'experiment_id', 'model', 'sys_prompt_tag', 'sys_prompt_version', \n",
    "                     'usr_prompt_tag', 'mode', 'system_prompt_hash', 'user_prompt_hash', \n",
    "                     'scenario_hash', 'turn_index', 'drift_score', 'drift_notes', \n",
    "                     'user_input', 'model_output', 'rendered_prompt', 'variant_id', \n",
    "                     'prompt_header', 'containment_flags']\n",
    "    df_original = pd.DataFrame(columns=expected_cols)\n",
    "    # You could re-integrate your more complex sample data generator here if you want better fallback behavior.\n",
    "\n",
    "df = df_original.copy()\n",
    "\n",
    "# --- Basic Cleaning & Type Conversion ---\n",
    "if not df.empty:\n",
    "    df['drift_score'] = pd.to_numeric(df['drift_score'], errors='coerce').fillna(0.0)\n",
    "    df['turn_index'] = pd.to_numeric(df['turn_index'], errors='coerce').fillna(0).astype(int)\n",
    "\n",
    "    # Fill NaN for critical categorical columns\n",
    "    categorical_cols_to_fill = ['model', 'sys_prompt_tag', 'sys_prompt_version', 'usr_prompt_tag', \n",
    "                                'experiment_id', 'mode', 'variant_id', 'prompt_header', 'filename',\n",
    "                                'system_prompt_hash', 'user_prompt_hash', 'scenario_hash']\n",
    "    for col in categorical_cols_to_fill:\n",
    "        if col in df.columns:\n",
    "            df[col] = df[col].fillna(f'{col.upper()}_UNKNOWN')\n",
    "        else: # If a column might be missing from the CSV (e.g. if generated by older script)\n",
    "            df[col] = f'{col.upper()}_MISSING'\n",
    "\n",
    "    df['user_input'] = df['user_input'].fillna('')\n",
    "    df['model_output'] = df['model_output'].fillna('')\n",
    "    df['rendered_prompt'] = df['rendered_prompt'].fillna('')\n",
    "    df['drift_notes'] = df['drift_notes'].fillna('')\n",
    "    df['containment_flags'] = df['containment_flags'].fillna('')\n",
    "else:\n",
    "    print(\"DataFrame is empty. Preprocessing skipped.\")\n",
    "\n",
    "# --- Parse `drift_notes` and `containment_flags` ---\n",
    "def parse_semi_colon_separated_string(notes_str):\n",
    "    if pd.isna(notes_str) or notes_str == '':\n",
    "        return []\n",
    "    return sorted(list(set([note.strip().lower() for note in str(notes_str).split(';') if note.strip()])))\n",
    "\n",
    "if 'drift_notes' in df.columns:\n",
    "    df['parsed_drift_notes'] = df['drift_notes'].apply(parse_semi_colon_separated_string)\n",
    "else:\n",
    "    df['parsed_drift_notes'] = pd.Series([[] for _ in range(len(df))], index=df.index)\n",
    "\n",
    "if 'containment_flags' in df.columns:\n",
    "    df['parsed_containment_flags'] = df['containment_flags'].apply(parse_semi_colon_separated_string)\n",
    "else:\n",
    "    df['parsed_containment_flags'] = pd.Series([[] for _ in range(len(df))], index=df.index)\n",
    "\n",
    "# --- One-Hot Encode `parsed_drift_notes` ---\n",
    "all_drift_note_tags = []\n",
    "if 'parsed_drift_notes' in df.columns:\n",
    "    all_drift_note_tags = sorted(list(set(tag for sublist in df['parsed_drift_notes'] for tag in sublist)))\n",
    "if all_drift_note_tags:\n",
    "    mlb_notes = MultiLabelBinarizer(classes=all_drift_note_tags)\n",
    "    df_notes_encoded = pd.DataFrame(mlb_notes.fit_transform(df['parsed_drift_notes']),\n",
    "                                    columns=[\"dnote_\" + c for c in mlb_notes.classes_], index=df.index)\n",
    "    df = pd.concat([df, df_notes_encoded], axis=1)\n",
    "    print(f\"One-hot encoded {len(mlb_notes.classes_)} drift note types.\")\n",
    "else:\n",
    "    mlb_notes = None\n",
    "    print(\"No drift notes found to one-hot encode.\")\n",
    "\n",
    "# --- One-Hot Encode `parsed_containment_flags` ---\n",
    "all_containment_flag_tags = []\n",
    "if 'parsed_containment_flags' in df.columns:\n",
    "    all_containment_flag_tags = sorted(list(set(tag for sublist in df['parsed_containment_flags'] for tag in sublist)))\n",
    "if all_containment_flag_tags:\n",
    "    mlb_flags = MultiLabelBinarizer(classes=all_containment_flag_tags)\n",
    "    df_flags_encoded = pd.DataFrame(mlb_flags.fit_transform(df['parsed_containment_flags']),\n",
    "                                     columns=[\"cflag_\" + c for c in mlb_flags.classes_], index=df.index)\n",
    "    df = pd.concat([df, df_flags_encoded], axis=1)\n",
    "    print(f\"One-hot encoded {len(mlb_flags.classes_)} containment flag types.\")\n",
    "else:\n",
    "    mlb_flags = None\n",
    "    print(\"No containment flags found to one-hot encode.\")\n",
    "\n",
    "print(\"\\nPreprocessing Complete.\")\n",
    "if not df.empty:\n",
    "    print(\"DataFrame info:\")\n",
    "    df.info()\n",
    "    print(\"\\nSample of processed data (first 3 rows):\")\n",
    "    display(df.head(3))\n",
    "else:\n",
    "    print(\"DataFrame remains empty.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 3. Interactive Dashboard Setup (Focused)"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Widgets ---\n",
    "dashboard_title_v2_1 = widgets.HTML(\"<h1>Interactive Model Drift Explorer (V2.1 Data)</h1>\")\n",
    "\n",
    "def create_dropdown_v2_1(column_name, description_text, default_value='All', width='auto'):\n",
    "    options = ['All']\n",
    "    if column_name in df.columns and not df.empty and df[column_name].notna().any():\n",
    "        options.extend(sorted(list(df[column_name].dropna().unique())))\n",
    "    else:\n",
    "        options.append(f'{column_name.upper()}_NODATA')\n",
    "\n",
    "    if default_value not in options and options: default_value = options[0]\n",
    "    elif not options: return widgets.HTML(f\"<i>No options for {description_text}</i>\")\n",
    "    \n",
    "    return widgets.Dropdown(options=options, value=default_value, description=description_text, \n",
    "                            style={'description_width': 'initial'}, layout=widgets.Layout(width=width))\n",
    "\n",
    "model_dropdown_v2_1 = create_dropdown_v2_1('model', 'Model:')\n",
    "sys_prompt_dropdown_v2_1 = create_dropdown_v2_1('sys_prompt_tag', 'Sys Prompt:')\n",
    "usr_prompt_dropdown_v2_1 = create_dropdown_v2_1('usr_prompt_tag', 'Usr Prompt:')\n",
    "experiment_dropdown_v2_1 = create_dropdown_v2_1('experiment_id', 'Experiment:', width='500px')\n",
    "mode_dropdown_v2_1 = create_dropdown_v2_1('mode', 'Mode:')\n",
    "variant_id_dropdown_v2_1 = create_dropdown_v2_1('variant_id', 'Variant ID:')\n",
    "\n",
    "\n",
    "min_drift_v2_1, max_drift_v2_1 = (df['drift_score'].min(), df['drift_score'].max()) if not df.empty and 'drift_score' in df.columns and df['drift_score'].notna().any() else (0.0, 1.0)\n",
    "drift_slider_v2_1 = widgets.FloatRangeSlider(\n",
    "    value=[min_drift_v2_1, max_drift_v2_1], min=min_drift_v2_1, max=max_drift_v2_1, step=0.01,\n",
    "    description='Drift Range:', continuous_update=False, style={'description_width': 'initial'}, layout=widgets.Layout(width='400px')\n",
    ")\n",
    "\n",
    "min_turn_v2_1, max_turn_v2_1 = (int(df['turn_index'].min()), int(df['turn_index'].max())) if not df.empty and 'turn_index' in df.columns and df['turn_index'].notna().any() else (1, 10)\n",
    "turn_slider_v2_1 = widgets.IntRangeSlider(\n",
    "    value=[min_turn_v2_1, max_turn_v2_1], min=min_turn_v2_1, max=max_turn_v2_1, step=1,\n",
    "    description='Turn Range:', continuous_update=False, style={'description_width': 'initial'}, layout=widgets.Layout(width='400px')\n",
    ")\n",
    "\n",
    "if all_drift_note_tags:\n",
    "    dnote_filter_v2_1 = widgets.SelectMultiple(options=['All'] + all_drift_note_tags, value=['All'], description='Drift Notes:', rows=min(5, len(all_drift_note_tags)+1))\n",
    "else:\n",
    "    dnote_filter_v2_1 = widgets.HTML(\"<i>No drift notes for filtering.</i>\")\n",
    "\n",
    "if all_containment_flag_tags:\n",
    "    cflag_filter_v2_1 = widgets.SelectMultiple(options=['All'] + all_containment_flag_tags, value=['All'], description='Contain. Flags:', rows=min(5, len(all_containment_flag_tags)+1))\n",
    "else:\n",
    "    cflag_filter_v2_1 = widgets.HTML(\"<i>No containment flags for filtering.</i>\")\n",
    "\n",
    "\n",
    "viz_options_v2_1 = ['Drift Trajectory (Model & SysPrompt vs Turn)', \n",
    "                  'Drift Distribution (Model vs SysPrompt)', \n",
    "                  'Drift Heatmap (Model & SysPrompt vs Turn)',\n",
    "                  'Drift Notes Frequency', 'Drift Notes vs Score',\n",
    "                  'Containment Flags Frequency', 'Containment Flags vs Score',\n",
    "                  'Text WordCloud (Model Output)',\n",
    "                  '3D Drift Scatter (Turn, Model/SysPrompt, Score)']\n",
    "viz_type_dropdown_v2_1 = widgets.Dropdown(options=viz_options_v2_1, value=viz_options_v2_1[0], description='View Type:')\n",
    "\n",
    "main_plot_output_v2_1 = go.FigureWidget()\n",
    "detail_text_output_v2_1 = widgets.HTML(value=\"<i>Click point on Trajectory/3D Scatter for details.</i>\", layout=widgets.Layout(height='250px', overflow_y='auto', border='1px solid #ccc', padding='10px'))\n",
    "stats_output_v2_1 = widgets.HTML(value=\"<i>Statistics summary.</i>\", layout=widgets.Layout(height='250px', overflow_y='auto', border='1px solid #ccc', padding='10px'))\n",
    "stats_button_v2_1 = widgets.Button(description=\"Refresh Filtered Stats\", button_style='info')"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 4. Focused Filtering and Plotting Functions"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Focused Filtering Function ---\n",
    "def get_filtered_df_v2_1():\n",
    "    f_df = df.copy()\n",
    "    if f_df.empty: return f_df\n",
    "\n",
    "    # Apply filters only if the corresponding column exists in f_df\n",
    "    if model_dropdown_v2_1.value != 'All' and 'model' in f_df.columns: f_df = f_df[f_df['model'] == model_dropdown_v2_1.value]\n",
    "    if sys_prompt_dropdown_v2_1.value != 'All' and 'sys_prompt_tag' in f_df.columns: f_df = f_df[f_df['sys_prompt_tag'] == sys_prompt_dropdown_v2_1.value]\n",
    "    if usr_prompt_dropdown_v2_1.value != 'All' and 'usr_prompt_tag' in f_df.columns: f_df = f_df[f_df['usr_prompt_tag'] == usr_prompt_dropdown_v2_1.value]\n",
    "    if experiment_dropdown_v2_1.value != 'All' and 'experiment_id' in f_df.columns: f_df = f_df[f_df['experiment_id'] == experiment_dropdown_v2_1.value]\n",
    "    if mode_dropdown_v2_1.value != 'All' and 'mode' in f_df.columns: f_df = f_df[f_df['mode'] == mode_dropdown_v2_1.value]\n",
    "    if variant_id_dropdown_v2_1.value != 'All' and 'variant_id' in f_df.columns: f_df = f_df[f_df['variant_id'] == variant_id_dropdown_v2_1.value]\n",
    "    \n",
    "    if 'drift_score' in f_df.columns: \n",
    "        f_df = f_df[(f_df['drift_score'] >= drift_slider_v2_1.value[0]) & (f_df['drift_score'] <= drift_slider_v2_1.value[1])]\n",
    "    if 'turn_index' in f_df.columns:\n",
    "        f_df = f_df[(f_df['turn_index'] >= turn_slider_v2_1.value[0]) & (f_df['turn_index'] <= turn_slider_v2_1.value[1])]\n",
    "\n",
    "    if mlb_notes and isinstance(dnote_filter_v2_1, widgets.SelectMultiple) and 'All' not in dnote_filter_v2_1.value:\n",
    "        selected_notes = dnote_filter_v2_1.value\n",
    "        note_cols_to_check = [\"dnote_\" + note for note in selected_notes]\n",
    "        valid_note_cols = [col for col in note_cols_to_check if col in f_df.columns]\n",
    "        if valid_note_cols: f_df = f_df[f_df[valid_note_cols].sum(axis=1) > 0]\n",
    "        elif selected_notes and not f_df.empty: f_df = pd.DataFrame(columns=f_df.columns) # Return empty if specific notes selected but no such columns\n",
    "\n",
    "    if mlb_flags and isinstance(cflag_filter_v2_1, widgets.SelectMultiple) and 'All' not in cflag_filter_v2_1.value:\n",
    "        selected_flags = cflag_filter_v2_1.value\n",
    "        flag_cols_to_check = [\"cflag_\" + flag for flag in selected_flags]\n",
    "        valid_flag_cols = [col for col in flag_cols_to_check if col in f_df.columns]\n",
    "        if valid_flag_cols: f_df = f_df[f_df[valid_flag_cols].sum(axis=1) > 0]\n",
    "        elif selected_flags and not f_df.empty: f_df = pd.DataFrame(columns=f_df.columns)\n",
    "            \n",
    "    return f_df\n",
    "\n",
    "# --- Click Handler (More Robust) ---\n",
    "def handle_point_click_v2_1(trace, points, state):\n",
    "    if not points.point_inds:\n",
    "        detail_text_output_v2_1.value = \"<i>Click on a data point on Trajectory or 3D Scatter.</i>\"\n",
    "        return\n",
    "\n",
    "    clicked_custom_data_index_in_trace = points.point_inds[0]\n",
    "    selected_row_data = None\n",
    "\n",
    "    # Check if customdata (original DataFrame index) was passed and retrieve it\n",
    "    if hasattr(trace, 'customdata') and trace.customdata is not None and \\\n",
    "       len(trace.customdata) > clicked_custom_data_index_in_trace:\n",
    "        \n",
    "        original_df_idx = trace.customdata[clicked_custom_data_index_in_trace]\n",
    "        \n",
    "        # Ensure original_df_idx is a single value if it's wrapped in a list/tuple by Plotly\n",
    "        if isinstance(original_df_idx, (list, tuple)) and len(original_df_idx) == 1:\n",
    "            original_df_idx = original_df_idx[0]\n",
    "            \n",
    "        if original_df_idx in df_original.index:\n",
    "            selected_row_data = df_original.loc[original_df_idx]\n",
    "        else:\n",
    "            detail_text_output_v2_1.value = f\"<i>Error: Original index {original_df_idx} not found in df_original. Customdata might be misconfigured.</i>\"\n",
    "            return\n",
    "    else:\n",
    "        # Fallback if customdata is not setup - this is less reliable\n",
    "        f_df = get_filtered_df_v2_1()\n",
    "        if not f_df.empty and clicked_custom_data_index_in_trace < len(f_df):\n",
    "            # This assumes point_inds directly map to filtered_df, which might not always hold for px.line with groups\n",
    "             clicked_row_in_filtered_df = f_df.iloc[clicked_custom_data_index_in_trace] \n",
    "             # We need filename and turn to uniquely identify in df_original\n",
    "             fname = clicked_row_in_filtered_df.get('filename')\n",
    "             t_idx = clicked_row_in_filtered_df.get('turn_index')\n",
    "             if fname is not None and t_idx is not None:\n",
    "                possible_matches = df_original[(df_original['filename']==fname) & (df_original['turn_index']==t_idx)]\n",
    "                if not possible_matches.empty:\n",
    "                    selected_row_data = possible_matches.iloc[0]\n",
    "\n",
    "    if selected_row_data is not None and isinstance(selected_row_data, pd.Series):\n",
    "        details_html = f\"\"\"\n",
    "        <h3>Details:</h3>\n",
    "        <b>File:</b> {selected_row_data.get('filename', 'N/A')}<br>\n",
    "        <b>Experiment:</b> {selected_row_data.get('experiment_id', 'N/A')}<br>\n",
    "        <b>Model:</b> {selected_row_data.get('model', 'N/A')}<br>\n",
    "        <b>Sys Prompt:</b> {selected_row_data.get('sys_prompt_tag', 'N/A')} (v: {selected_row_data.get('sys_prompt_version', 'N/A')})<br>\n",
    "        <b>Usr Prompt:</b> {selected_row_data.get('usr_prompt_tag', 'N/A')}<br>\n",
    "        <b>Variant ID:</b> {selected_row_data.get('variant_id', 'N/A')}<br>\n",
    "        <b>Mode:</b> {selected_row_data.get('mode', 'N/A')}<br>\n",
    "        <b>Turn:</b> {selected_row_data.get('turn_index', 'N/A')}<br>\n",
    "        <b>Drift Score:</b> {selected_row_data.get('drift_score', np.nan):.3f}<br>\n",
    "        <b>Drift Notes:</b> {selected_row_data.get('drift_notes', 'N/A')}<br>\n",
    "        <b>Containment Flags:</b> {selected_row_data.get('containment_flags', 'N/A')}<br>\n",
    "        <h4>Rendered Prompt:</h4><div style='background-color:#f0f0f0; padding:5px; border:1px solid #ddd; max-height:100px; overflow-y:auto;'>{HTML(str(selected_row_data.get('rendered_prompt', ''))).value}</div>\n",
    "        <h4>Model Output:</h4><div style='background-color:#e6f3ff; padding:5px; border:1px solid #ddd; max-height:150px; overflow-y:auto;'>{HTML(str(selected_row_data.get('model_output', ''))).value}</div>\n",
    "        \"\"\"\n",
    "        detail_text_output_v2_1.value = details_html\n",
    "    else:\n",
    "        detail_text_output_v2_1.value = \"<i>Could not retrieve details for the clicked point. Ensure plots pass original index via custom_data.</i>\"\n",
    "\n",
    "\n",
    "# --- Plotting Functions (Focused) ---\n",
    "def plot_drift_trajectory_focused(f_df):\n",
    "    if f_df.empty: return go.Figure().update_layout(title_text=\"No data for Trajectory.\", template=plotly_template)\n",
    "    fig = px.line(f_df, x='turn_index', y='drift_score', color='model',\n",
    "                  line_dash='sys_prompt_tag', markers=True,\n",
    "                  hover_data=['filename', 'experiment_id', 'drift_notes'],\n",
    "                  custom_data=[f_df.index], # Pass original DataFrame index\n",
    "                  title=\"Drift Score per Turn (Model & System Prompt)\", template=plotly_template)\n",
    "    fig.update_layout(height=600, legend_title_text='Model | SysPrompt')\n",
    "    for trace in fig.data:\n",
    "        if isinstance(trace, (go.Scatter, go.Scattergl)):\n",
    "             trace.on_click(handle_point_click_v2_1)\n",
    "    return fig\n",
    "\n",
    "def plot_drift_distribution_focused(f_df):\n",
    "    if f_df.empty: return go.Figure().update_layout(title_text=\"No data for Distribution.\", template=plotly_template)\n",
    "    fig = px.box(f_df, x='model', y='drift_score', color='sys_prompt_tag',\n",
    "                 title='Drift Score Distribution (Model vs System Prompt)', template=plotly_template)\n",
    "    fig.update_layout(height=600, xaxis_title='Model', yaxis_title='Drift Score', legend_title_text='System Prompt')\n",
    "    return fig\n",
    "\n",
    "def plot_drift_heatmap_focused(f_df):\n",
    "    if f_df.empty: return go.Figure().update_layout(title_text=\"No data for Heatmap.\", template=plotly_template)\n",
    "    try:\n",
    "        # Try to pivot for a more detailed view if possible\n",
    "        if f_df['model'].nunique() > 1 and f_df['sys_prompt_tag'].nunique() > 1:\n",
    "             pivot_df = f_df.groupby(['turn_index', 'model', 'sys_prompt_tag'])['drift_score'].mean().unstack(['model', 'sys_prompt_tag'])\n",
    "             x_label = \"Model & System Prompt\"\n",
    "             y_label = \"Turn Index\"\n",
    "        elif f_df['model'].nunique() > 1:\n",
    "            pivot_df = f_df.pivot_table(values='drift_score', index='model', columns='turn_index', aggfunc=np.mean)\n",
    "            x_label = \"Turn Index\"\n",
    "            y_label = \"Model\"\n",
    "        elif f_df['sys_prompt_tag'].nunique() > 1:\n",
    "            pivot_df = f_df.pivot_table(values='drift_score', index='sys_prompt_tag', columns='turn_index', aggfunc=np.mean)\n",
    "            x_label = \"Turn Index\"\n",
    "            y_label = \"System Prompt\"\n",
    "        else: # Fallback if only one model and one sys_prompt selected (or not enough diversity)\n",
    "            pivot_df = f_df.pivot_table(values='drift_score', index='experiment_id', columns='turn_index', aggfunc=np.mean)\n",
    "            x_label = \"Turn Index\"\n",
    "            y_label = \"Experiment ID\"\n",
    "        \n",
    "        if pivot_df.empty or pivot_df.shape[1] == 0:\n",
    "            return go.Figure().update_layout(title_text=\"Not enough data diversity for this heatmap view.\", template=plotly_template)\n",
    "\n",
    "        fig = px.imshow(pivot_df, \n",
    "                        labels=dict(x=x_label, y=y_label, color=\"Avg Drift Score\"),\n",
    "                        title='Avg Drift Heatmap', \n",
    "                        color_continuous_scale='RdYlGn_r',\n",
    "                        template=plotly_template, aspect=\"auto\")\n",
    "        fig.update_layout(height=700, xaxis_tickangle=-45 if len(pivot_df.columns) > 10 else 0)\n",
    "    except Exception as e:\n",
    "        fig = go.Figure().update_layout(title_text=f\"Error creating heatmap: {e}\", template=plotly_template)\n",
    "    return fig\n",
    "\n",
    "def plot_notes_or_flags_freq_v2_1(f_df, column_prefix, parsed_column_name, title_suffix):\n",
    "    if f_df.empty or parsed_column_name not in f_df.columns:\n",
    "        return go.Figure().update_layout(title_text=f\"No data for {title_suffix} Frequency.\", template=plotly_template)\n",
    "    all_items_filtered = Counter(tag for sublist in f_df[parsed_column_name] for tag in sublist)\n",
    "    if not all_items_filtered: return go.Figure().update_layout(title_text=f\"No {title_suffix} in selected data.\", template=plotly_template)\n",
    "    item_freq_df = pd.DataFrame(all_items_filtered.most_common(20), columns=[title_suffix.split()[0], 'Frequency'])\n",
    "    fig = px.bar(item_freq_df, x='Frequency', y=title_suffix.split()[0], orientation='h', title=f'Top 20 {title_suffix} Frequencies', template=plotly_template)\n",
    "    fig.update_layout(height=max(600, len(item_freq_df) * 30), yaxis={'categoryorder':'total ascending'})\n",
    "    return fig\n",
    "\n",
    "def plot_notes_or_flags_vs_score_v2_1(f_df, column_prefix, title_suffix):\n",
    "    if f_df.empty or not any(col.startswith(column_prefix) for col in f_df.columns):\n",
    "        return go.Figure().update_layout(title_text=f\"No data/encoded {title_suffix} for Score Comparison.\", template=plotly_template)\n",
    "    item_cols = [col for col in f_df.columns if col.startswith(column_prefix)]\n",
    "    if not item_cols: return go.Figure().update_layout(title_text=f\"No one-hot encoded {title_suffix} columns found.\", template=plotly_template)\n",
    "    melted_df = f_df.melt(id_vars=['drift_score'], value_vars=item_cols, var_name=f'{title_suffix} Type', value_name='Is Present')\n",
    "    melted_df = melted_df[melted_df['Is Present'] == 1]\n",
    "    melted_df[f'{title_suffix} Type'] = melted_df[f'{title_suffix} Type'].str.replace(column_prefix, '')\n",
    "    if melted_df.empty: return go.Figure().update_layout(title_text=f\"No {title_suffix} present for score comparison.\", template=plotly_template)\n",
    "    fig = px.box(melted_df, x=f'{title_suffix} Type', y='drift_score', title=f'Drift Score Distribution by {title_suffix} Type', template=plotly_template)\n",
    "    fig.update_layout(height=600, xaxis_tickangle=-45)\n",
    "    return fig\n",
    "\n",
    "def plot_wordcloud_v2_1(f_df, text_column='model_output'):\n",
    "    if f_df.empty or text_column not in f_df.columns:\n",
    "        return go.Figure().update_layout(title_text=\"No data for WordCloud.\", template=plotly_template)\n",
    "    text_corpus = \" \".join(f_df[text_column].astype(str).dropna())\n",
    "    if not text_corpus.strip(): return go.Figure().update_layout(title_text=f\"No text in '{text_column}' for WordCloud.\", template=plotly_template)\n",
    "    processed_text = text_corpus.lower()\n",
    "    processed_text = re.sub(r'[\\\"“”‘’]', '', processed_text) # Remove more quote types\n",
    "    processed_text = re.sub(r'[^\\w\\s]', ' ', processed_text) # Replace punctuation with space\n",
    "    words = [word for word in processed_text.split() if word not in stop_words and len(word) > 2 and not word.isnumeric()]\n",
    "    if not words: return go.Figure().update_layout(title_text=\"No significant words for Word Cloud.\", template=plotly_template)\n",
    "    try:\n",
    "        wordcloud = WordCloud(width=800, height=500, background_color='white', colormap=\"viridis\").generate(\" \".join(words))\n",
    "        img_bytes = io.BytesIO()\n",
    "        fig_temp_wc, ax = plt.subplots(figsize=(10,6))\n",
    "        ax.imshow(wordcloud, interpolation='bilinear')\n",
    "        ax.axis('off')\n",
    "        fig_temp_wc.savefig(img_bytes, format='png', bbox_inches='tight', pad_inches=0)\n",
    "        plt.close(fig_temp_wc)\n",
    "        img_bytes.seek(0)\n",
    "        img_base64 = base64.b64encode(img_bytes.read()).decode('utf-8')\n",
    "        fig = go.Figure(go.Image(source=f'data:image/png;base64,{img_base64}'))\n",
    "        fig.update_layout(title_text=f'Word Cloud of {text_column} (Filtered)', height=600, template=plotly_template)\n",
    "        fig.update_xaxes(showticklabels=False, visible=False)\n",
    "        fig.update_yaxes(showticklabels=False, visible=False)\n",
    "    except Exception as e:\n",
    "        fig = go.Figure().update_layout(title_text=f\"Error generating wordcloud: {e}\", template=plotly_template)\n",
    "    return fig\n",
    "\n",
    "def plot_3d_drift_scatter_focused(f_df):\n",
    "    if f_df.empty: return go.Figure().update_layout(title_text=\"No data for 3D Scatter.\", template=plotly_template)\n",
    "    y_axis_col = 'model'\n",
    "    color_col = 'sys_prompt_tag'\n",
    "    y_title = 'Model'\n",
    "    if model_dropdown_v2_1.value != 'All' and 'sys_prompt_tag' in f_df.columns and f_df['sys_prompt_tag'].nunique() > 0:\n",
    "        y_axis_col = 'sys_prompt_tag'\n",
    "        color_col = 'usr_prompt_tag' if 'usr_prompt_tag' in f_df.columns and f_df['usr_prompt_tag'].nunique() > 0 else 'experiment_id'\n",
    "        y_title = 'System Prompt'\n",
    "    elif sys_prompt_dropdown_v2_1.value != 'All' and 'model' in f_df.columns and f_df['model'].nunique() > 0:\n",
    "        y_axis_col = 'model'\n",
    "        color_col = 'usr_prompt_tag' if 'usr_prompt_tag' in f_df.columns and f_df['usr_prompt_tag'].nunique() > 0 else 'experiment_id'\n",
    "        y_title = 'Model'\n",
    "    \n",
    "    if y_axis_col not in f_df.columns or f_df[y_axis_col].nunique() == 0:\n",
    "        return go.Figure().update_layout(title_text=f\"Not enough unique values for Y-axis '{y_title}' in 3D scatter.\", template=plotly_template)\n",
    "    if color_col not in f_df.columns:\n",
    "        color_col = 'model' # Fallback\n",
    "\n",
    "    fig = px.scatter_3d(f_df, x='turn_index', y=y_axis_col, z='drift_score',\n",
    "                        color=color_col, size='drift_score', opacity=0.7,\n",
    "                        hover_data=['filename', 'experiment_id', 'drift_notes', 'variant_id'],\n",
    "                        custom_data=[f_df.index], # Pass original DataFrame index\n",
    "                        color_continuous_scale=px.colors.sequential.Viridis if pd.api.types.is_numeric_dtype(f_df[color_col]) and f_df[color_col].nunique() > 1 else None,\n",
    "                        color_discrete_sequence=px.colors.qualitative.Plotly if not (pd.api.types.is_numeric_dtype(f_df[color_col]) and f_df[color_col].nunique() > 1) else None,\n",
    "                        title='3D Drift (Turn vs Y-Category vs Score)', template=plotly_template)\n",
    "    y_categories = sorted(f_df[y_axis_col].dropna().unique())\n",
    "    fig.update_layout(height=700, scene=dict(\n",
    "        xaxis_title='Turn Index',\n",
    "        yaxis_title=y_title,\n",
    "        zaxis_title='Drift Score',\n",
    "        yaxis=dict(type='category', categoryorder='array', categoryarray=y_categories)\n",
    "    ))\n",
    "    for trace in fig.data:\n",
    "        if isinstance(trace, go.Scatter3d):\n",
    "            trace.on_click(handle_point_click_v2_1)\n",
    "    return fig\n",
    "\n",
    "# --- Statistics Function (Focused) ---\n",
    "def update_stats_output_v2_1(b=None):\n",
    "    f_df = get_filtered_df_v2_1()\n",
    "    if f_df.empty:\n",
    "        stats_output_v2_1.value = \"<i>No data for selected filters.</i>\"\n",
    "        return\n",
    "\n",
    "    stats_html = f\"<h3>Filtered Data Statistics ({len(f_df)} rows):</h3>\"\n",
    "    stats_html += f\"<b>Avg Drift:</b> {f_df['drift_score'].mean():.3f} | <b>Median:</b> {f_df['drift_score'].median():.3f} | <b>StdDev:</b> {f_df['drift_score'].std():.3f}<br>\"\n",
    "    stats_html += f\"<b>Min Drift:</b> {f_df['drift_score'].min():.3f} | <b>Max Drift:</b> {f_df['drift_score'].max():.3f}<br>\"\n",
    "    \n",
    "    if 'model' in f_df.columns and 'sys_prompt_tag' in f_df.columns and \\\n",
    "       f_df[['model', 'sys_prompt_tag']].notna().all(axis=1).any():\n",
    "        try:\n",
    "            avg_drift_model_sys = f_df.groupby(['model', 'sys_prompt_tag'])['drift_score'].mean().unstack()\n",
    "            if not avg_drift_model_sys.empty:\n",
    "                stats_html += \"<br><b>Avg Drift Score by Model and System Prompt:</b><br>\" + \\\
    "                              avg_drift_model_sys.to_html(classes='table table-sm table-striped',\n",
    "                                                          float_format='{:.3f}'.format) # Corrected float_format\n",
    "        except Exception as e:\n",
    "            stats_html += f\"<br><i>Error generating model/sys_prompt stats: {e}</i>\"\n",
    "    else:\n",
    "        stats_html += \"<br><i>Not enough data for Avg Drift by Model and System Prompt.</i>\"\n",
    "\n",
    "    if 'parsed_drift_notes' in f_df.columns:\n",
    "        current_all_notes = Counter(tag for sublist in f_df['parsed_drift_notes'] for tag in sublist)\n",
    "        if current_all_notes:\n",
    "            stats_html += \"<br><b>Top 5 Drift Notes in Selection:</b><br>\"\n",
    "            for note, count in current_all_notes.most_common(5): stats_html += f\"- {note}: {count}<br>\"\n",
    "    \n",
    "    if 'parsed_containment_flags' in f_df.columns:\n",
    "        current_all_flags = Counter(tag for sublist in f_df['parsed_containment_flags'] for tag in sublist)\n",
    "        if current_all_flags:\n",
    "            stats_html += \"<br><b>Top 5 Containment Flags:</b><br>\"\n",
    "            for flag, count in current_all_flags.most_common(5): stats_html += f\"- {flag}: {count}<br>\"\n",
    "            \n",
    "    stats_output_v2_1.value = stats_html\n",
    "\n",
    "stats_button_v2_1.on_click(update_stats_output_v2_1)"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 5. Dashboard Update Logic and Display (V2.1 - Focused)"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Main Update Function (V2.1) ---\n",
    "def on_value_change_v2_1(*args):\n",
    "    f_df = get_filtered_df_v2_1()\n",
    "    \n",
    "    with main_plot_output_v2_1.batch_update():\n",
    "        main_plot_output_v2_1.data = []\n",
    "        main_plot_output_v2_1.layout = {} \n",
    "        \n",
    "        selected_view = viz_type_dropdown_v2_1.value\n",
    "        fig = None\n",
    "        if selected_view == 'Drift Trajectory (Model & SysPrompt vs Turn)': fig = plot_drift_trajectory_focused(f_df)\n",
    "        elif selected_view == 'Drift Distribution (Model vs SysPrompt)': fig = plot_drift_distribution_focused(f_df)\n",
    "        elif selected_view == 'Drift Heatmap (Model & SysPrompt vs Turn)': fig = plot_drift_heatmap_focused(f_df)\n",
    "        elif selected_view == 'Drift Notes Frequency': fig = plot_notes_or_flags_freq_v2_1(f_df, \"dnote_\", \"parsed_drift_notes\", \"Drift Note\")\n",
    "        elif selected_view == 'Drift Notes vs Score': fig = plot_notes_or_flags_vs_score_v2_1(f_df, \"dnote_\", \"Drift Note\")\n",
    "        elif selected_view == 'Containment Flags Frequency': fig = plot_notes_or_flags_freq_v2_1(f_df, \"cflag_\", \"parsed_containment_flags\", \"Containment Flag\")\n",
    "        elif selected_view == 'Containment Flags vs Score': fig = plot_notes_or_flags_vs_score_v2_1(f_df, \"cflag_\", \"Containment Flag\")\n",
    "        elif selected_view == 'Text WordCloud (Model Output)': fig = plot_wordcloud_v2_1(f_df)\n",
    "        elif selected_view == '3D Drift Scatter (Turn, Model/SysPrompt, Score)': fig = plot_3d_drift_scatter_focused(f_df)\n",
    "\n",
    "        if fig:\n",
    "            main_plot_output_v2_1.add_traces(fig.data)\n",
    "            main_plot_output_v2_1.layout = fig.layout\n",
    "            main_plot_output_v2_1.layout.height = 700 \n",
    "            main_plot_output_v2_1.layout.template = plotly_template\n",
    "            \n",
    "    if viz_type_dropdown_v2_1.value not in ['Drift Trajectory (Model & SysPrompt vs Turn)', '3D Drift Scatter (Turn, Model/SysPrompt, Score)']:\n",
    "         detail_text_output_v2_1.value = \"<i>Detail view on click available for 'Drift Trajectory' and '3D Drift Scatter'.</i>\"\n",
    "    else:\n",
    "         detail_text_output_v2_1.value = \"<i>Click on a point to see details.</i>\"\n",
    "    \n",
    "# --- Observe Changes ---\n",
    "widgets_to_observe_v2_1 = [model_dropdown_v2_1, sys_prompt_dropdown_v2_1, usr_prompt_dropdown_v2_1,\n",
    "                           experiment_dropdown_v2_1, mode_dropdown_v2_1, variant_id_dropdown_v2_1, \n",
    "                           drift_slider_v2_1, turn_slider_v2_1, viz_type_dropdown_v2_1]\n",
    "if isinstance(dnote_filter_v2_1, widgets.SelectMultiple): widgets_to_observe_v2_1.append(dnote_filter_v2_1)\n",
    "if isinstance(cflag_filter_v2_1, widgets.SelectMultiple): widgets_to_observe_v2_1.append(cflag_filter_v2_1)\n",
    "\n",
    "for w in widgets_to_observe_v2_1:\n",
    "    w.observe(on_value_change_v2_1, names='value')\n",
    "\n",
    "# --- Dashboard Layout (V2.1 - Focused) ---\n",
    "controls_col1_v2_1 = widgets.VBox([model_dropdown_v2_1, sys_prompt_dropdown_v2_1, usr_prompt_dropdown_v2_1])\n",
    "controls_col2_v2_1 = widgets.VBox([experiment_dropdown_v2_1, mode_dropdown_v2_1, variant_id_dropdown_v2_1])\n",
    "controls_col3_sliders_v2_1 = widgets.VBox([drift_slider_v2_1, turn_slider_v2_1])\n",
    "controls_col4_multiselect_v2_1 = widgets.VBox([dnote_filter_v2_1, cflag_filter_v2_1])\n",
    "\n",
    "filters_area_v2_1 = widgets.HBox([controls_col1_v2_1, controls_col2_v2_1, controls_col3_sliders_v2_1, controls_col4_multiselect_v2_1])\n",
    "view_selector_area_v2_1 = widgets.HBox([viz_type_dropdown_v2_1])\n",
    "\n",
    "dashboard_layout_v2_1 = widgets.VBox([\n",
    "    dashboard_title_v2_1,\n",
    "    filters_area_v2_1,\n",
    "    view_selector_area_v2_1,\n",
    "    main_plot_output_v2_1,\n",
    "    widgets.HBox([detail_text_output_v2_1, \n",
    "                  widgets.VBox([stats_button_v2_1, stats_output_v2_1], layout=widgets.Layout(width='48%'))], \n",
    "                 layout=widgets.Layout(justify_content='space-between', width='100%'))\n",
    "])\n",
    "\n",
    "# --- Initial Display ---\n",
    "if not df.empty and 'drift_score' in df.columns: # Check if df is not empty AND has essential column\n",
    "    display(dashboard_layout_v2_1)\n",
    "    on_value_change_v2_1() \n",
    "    update_stats_output_v2_1()\n",
    "else:\n",
    "    display(widgets.HTML(\"<h1>Drift Data Not Loaded or Malformed</h1><p>Cannot display dashboard. Please ensure 'per_turn_drift_v2.csv' is loaded correctly and contains necessary data like 'drift_score'.</p>\"))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 6. Specific Sort: Drift Score per Turn between Models and System Prompts\n",
    "This is already covered by a few of the views:\n",
    "\n",
    "1.  **'Drift Trajectory (Model & SysPrompt vs Turn)'**:\n",
    "    *   Directly plots `drift_score` vs. `turn_index`, with lines colored by `model` and styled by `sys_prompt_tag`.\n",
    "\n",
    "2.  **'Drift Heatmap (Model & SysPrompt vs Turn)'**:\n",
    "    *   The `plot_drift_heatmap_focused` function groups by `['turn_index', 'model', 'sys_prompt_tag']` and can show this relationship.\n",
    "\n",
    "3.  **'Drift Distribution (Model vs SysPrompt)'**:\n",
    "    *   Shows distributions for `model` vs `sys_prompt_tag` combinations. Filter by a single turn using the slider for per-turn view.\n",
    "\n",
    "**Tabular data for this specific sort:**"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not df.empty and all(col in df.columns for col in ['model', 'sys_prompt_tag', 'turn_index', 'drift_score']):\n",
    "    # Average drift score per model, per system prompt, per turn\n",
    "    avg_drift_per_turn_model_sys = df.groupby(['model', 'sys_prompt_tag', 'turn_index'])['drift_score'].mean().reset_index()\n",
    "    print(\"\\nAverage Drift Score per Turn, Model, and System Prompt:\")\n",
    "    if not avg_drift_per_turn_model_sys.empty:\n",
    "        display(avg_drift_per_turn_model_sys.head(20))\n",
    "    else:\n",
    "        print(\"No data after grouping for this view.\")\n",
    "\n",
    "    # To pivot this for easier comparison (e.g., turns as columns):\n",
    "    try:\n",
    "        pivot_avg_drift = avg_drift_per_turn_model_sys.pivot_table(\n",
    "            index=['model', 'sys_prompt_tag'], \n",
    "            columns='turn_index', \n",
    "            values='drift_score'\n",
    "        )\n",
    "        print(\"\\nPivoted Average Drift Score (Turns as Columns):\")\n",
    "        if not pivot_avg_drift.empty:\n",
    "            display(pivot_avg_drift)\n",
    "        else:\n",
    "            print(\"Pivoted table is empty - likely not enough data diversity for this shape.\")\n",
    "    except Exception as e:\n",
    "        print(f\"Error creating pivoted table: {e}\")\n",
    "else:\n",
    "    print(\"Required columns for specific sort not found or DataFrame is empty.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 7. Conclusion\n",
    "This focused notebook removes `persona`, `scored`, and `temperature` filters and visualizations, \n",
    "and ensures that the core analysis of drift score per turn across models and system prompts\n",
    "is clearly achievable through the primary interactive views and tabular data."
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Refined Monster Notebook (V2.1 Focused) processing complete!\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


SyntaxError: unexpected character after line continuation character (4258237819.py, line 510)