In [15]:
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Load the JSON data
with open("./results/part_selection_results.json") as f:
    data = json.load(f)

# Function to flatten nested dictionary (dataset_params and model_params)
def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

# Function to process the JSON data into a DataFrame
def process_data(data):
    rows = []
    for entry in data:
        # Flatten model_params and dataset_params and combine them
        flattened_params = {**flatten_dict(entry["model_params"]), **flatten_dict(entry["dataset_params"])}
        
        # Add train and validation metrics
        train_metrics = entry["train_metrics"]
        val_metrics = entry["val_metrics"]

        for epoch in range(entry["epochs"]):
            row = {
                'epoch': epoch + 1,
                'train_loss': train_metrics['loss'][epoch],
                'train_accuracy': train_metrics['accuracy'][epoch],
                'train_f1': train_metrics['f1'][epoch],
                'val_loss': val_metrics['loss'][epoch],
                'val_accuracy': val_metrics['accuracy'][epoch],
                'val_f1': val_metrics['f1'][epoch],
            }
            row.update(flattened_params)  # Include all flattened parameters in the row
            rows.append(row)
    
    return pd.DataFrame(rows)

# Convert the JSON data to a DataFrame
df = process_data(data)

# Get all unique parameter names (excluding metric columns)
pivot_options = [col for col in df.columns if col not in ['epoch', 'train_loss', 'train_accuracy', 'train_f1', 'val_loss', 'val_accuracy', 'val_f1']]

# Create a Dash app for interactive plotting
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Model Training Metrics"),
    
    # Dropdown for selecting the metric to display
    html.Label("Choose a Metric:"),
    dcc.Dropdown(
        id="metric",
        options=[
            {'label': 'Loss', 'value': 'loss'},
            {'label': 'Accuracy', 'value': 'accuracy'},
            {'label': 'F1 Score', 'value': 'f1'},
        ],
        value='loss'
    ),
    
    # Dropdown for selecting the hyperparameter to pivot over (dynamic based on dataset)
    html.Label("Choose a Hyperparameter to Pivot Over:"),
    dcc.Dropdown(
        id="pivot_by",
        options=[{'label': col.replace('_', ' ').capitalize(), 'value': col} for col in pivot_options],
        value='learning_rate'
    ),

    # Toggle to switch between individual performance and summary view
    dcc.RadioItems(
        id='view_mode',
        options=[
            {'label': 'Individual', 'value': 'individual'},
            {'label': 'Summary', 'value': 'summary'}
        ],
        value='individual',
        labelStyle={'display': 'inline-block'}
    ),
    
    # Plot for training metrics
    dcc.Graph(id='train_metric_graph'),
    
    # Plot for validation metrics
    dcc.Graph(id='val_metric_graph')
])

@app.callback(
    [Output('train_metric_graph', 'figure'),
     Output('val_metric_graph', 'figure')],
    [Input('metric', 'value'),
     Input('pivot_by', 'value'),
     Input('view_mode', 'value')]
)
def update_graph(selected_metric, pivot_by, view_mode):
    train_metric_col = f"train_{selected_metric}"
    val_metric_col = f"val_{selected_metric}"

    if view_mode == 'summary':
        summary_df = df.groupby(['epoch', pivot_by]).agg(
            min_train_metric=(train_metric_col, 'min'),
            min_val_metric=(val_metric_col, 'min'),
        ).reset_index()

        # Prepare figure for training metrics
        train_fig = go.Figure()

        unique_pivot_vals = summary_df[pivot_by].unique()
        color_map = px.colors.qualitative.Plotly[:len(unique_pivot_vals)]

        for i, val in enumerate(unique_pivot_vals):
            pivot_group = summary_df[summary_df[pivot_by] == val]

            train_fig.add_trace(go.Scatter(
                x=pivot_group['epoch'],
                y=pivot_group['min_train_metric'],
                mode='lines',
                name=f'Min Train ({val})',
                line=dict(color=color_map[i], dash='dash')
            ))

        train_fig.update_layout(
            title=f"Train {selected_metric.capitalize()} Summary Over Epochs",
            xaxis_title="Epoch",
            yaxis_title=f"Train {selected_metric.capitalize()}"
        )

        # Prepare figure for validation metrics
        val_fig = go.Figure()

        for i, val in enumerate(unique_pivot_vals):
            pivot_group = summary_df[summary_df[pivot_by] == val]

            val_fig.add_trace(go.Scatter(
                x=pivot_group['epoch'],
                y=pivot_group['min_val_metric'],
                mode='lines',
                name=f'Min Val ({val})',
                line=dict(color=color_map[i], dash='dash')
            ))

        val_fig.update_layout(
            title=f"Validation {selected_metric.capitalize()} Summary Over Epochs",
            xaxis_title="Epoch",
            yaxis_title=f"Validation {selected_metric.capitalize()}"
        )

    else:  # Individual mode
        # Plot for training metrics (individual performance)
        train_fig = px.line(df, x="epoch", y=train_metric_col, color=pivot_by, markers=True,
                            labels={
                                "epoch": "Epoch",
                                train_metric_col: f"Train {selected_metric.capitalize()}",
                                pivot_by: pivot_by.replace("_", " ").capitalize()
                            },
                            title=f"Train {selected_metric.capitalize()} Over Epochs")
        
        # Plot for validation metrics (individual performance)
        val_fig = px.line(df, x="epoch", y=val_metric_col, color=pivot_by, markers=True,
                          labels={
                              "epoch": "Epoch",
                              val_metric_col: f"Validation {selected_metric.capitalize()}",
                              pivot_by: pivot_by.replace("_", " ").capitalize()
                          },
                          title=f"Validation {selected_metric.capitalize()} Over Epochs")

    return train_fig, val_fig

if __name__ == '__main__':
    app.run_server(debug=True)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[15], line 162, in update_graph(
    selected_metric='loss',
    pivot_by='text_selection_method',
    view_mode='individual'
)
    154     val_fig.update_layout(
    155         title=f"Validation {selected_metric.capitalize()} Summary Over Epochs",
    156         xaxis_title="Epoch",
    157         yaxis_title=f"Validation {selected_metric.capitalize()}"
    158     )
    160 else:  # Individual mode
    161     # Plot for training metrics (individual performance)
--> 162     train_fig = px.line(df, x="epoch", y=train_metric_col, color=pivot_by, markers=True,
        train_metric_col = 'train_loss'
        df =       epoch  train_loss  train_accuracy  train_f1  val_loss  val_accuracy  \
0         1    0.062234        0.976605  0.976593  0.080573      0.486755   
1         2    0.058572        0.957632  0.957597  0.088242     

In [5]:
print(1 * 1e-5)

1e-05


In [9]:
1e-5 == 0.00001

True

In [16]:
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Load the JSON data
with open("./output_final_plotting.json") as f:
    data = json.load(f)

# Function to flatten nested dictionary (dataset_params and model_params)
def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

# Function to process the JSON data into a DataFrame
def process_data(data):
    rows = []
    for entry in data:
        # Flatten model_params and dataset_params and combine them
        flattened_params = {**flatten_dict(entry["model_params"]), **flatten_dict(entry["dataset_params"])}
        
        # Add train and validation metrics
        train_metrics = entry["train_metrics"]
        val_metrics = entry["val_metrics"]

        for epoch in range(entry["epochs"]):
            row = {
                'epoch': epoch + 1,
                'train_loss': train_metrics['loss'][epoch],
                'train_accuracy': train_metrics['accuracy'][epoch],
                'train_f1': train_metrics['f1'][epoch],
                'val_loss': val_metrics['loss'][epoch],
                'val_accuracy': val_metrics['accuracy'][epoch],
                'val_f1': val_metrics['f1'][epoch],
            }
            row.update(flattened_params)  # Include all flattened parameters in the row
            rows.append(row)
    
    df = pd.DataFrame(rows)
    # Convert unhashable types (lists, dictionaries, etc.) into strings for pivoting
    for col in df.columns:
        if df[col].apply(lambda x: isinstance(x, (list, dict, tuple))).any():
            df[col] = df[col].apply(lambda x: str(x))  # Convert to string
    
    return df

# Convert the JSON data to a DataFrame
df = process_data(data)

# Get all unique parameter names (excluding metric columns)
pivot_options = [col for col in df.columns if col not in ['epoch', 'train_loss', 'train_accuracy', 'train_f1', 'val_loss', 'val_accuracy', 'val_f1']]

# Create a Dash app for interactive plotting
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Model Training Metrics"),
    
    # Dropdown for selecting the metric to display
    html.Label("Choose a Metric:"),
    dcc.Dropdown(
        id="metric",
        options=[
            {'label': 'Loss', 'value': 'loss'},
            {'label': 'Accuracy', 'value': 'accuracy'},
            {'label': 'F1 Score', 'value': 'f1'},
        ],
        value='loss'
    ),
    
    # Dropdown for selecting the hyperparameter to pivot over (dynamic based on dataset)
    html.Label("Choose a Hyperparameter to Pivot Over:"),
    dcc.Dropdown(
        id="pivot_by",
        options=[{'label': str(col).replace('_', ' ').capitalize(), 'value': str(col)} for col in pivot_options],
        value='learning_rate'
    ),

    # Toggle to switch between individual, summary, and comparison view
    dcc.RadioItems(
        id='view_mode',
        options=[
            {'label': 'Individual', 'value': 'individual'},
            {'label': 'Summary', 'value': 'summary'},
            {'label': 'Comparison', 'value': 'comparison'}
        ],
        value='individual',
        labelStyle={'display': 'inline-block'}
    ),
    
    # Plot for training metrics
    dcc.Graph(id='train_metric_graph'),
    
    # Plot for validation metrics
    dcc.Graph(id='val_metric_graph')
])

@app.callback(
    [Output('train_metric_graph', 'figure'),
     Output('val_metric_graph', 'figure')],
    [Input('metric', 'value'),
     Input('pivot_by', 'value'),
     Input('view_mode', 'value')]
)
def update_graph(selected_metric, pivot_by, view_mode):
    train_metric_col = f"train_{selected_metric}"
    val_metric_col = f"val_{selected_metric}"

    if view_mode == 'summary':
        summary_df = df.groupby(['epoch', pivot_by]).agg(
            min_train_metric=(train_metric_col, 'min'),
            min_val_metric=(val_metric_col, 'min'),
        ).reset_index()

        # Prepare figure for training metrics
        train_fig = go.Figure()

        unique_pivot_vals = summary_df[pivot_by].unique()
        color_map = px.colors.qualitative.Plotly[:len(unique_pivot_vals)]

        for i, val in enumerate(unique_pivot_vals):
            pivot_group = summary_df[summary_df[pivot_by] == val]

            train_fig.add_trace(go.Scatter(
                x=pivot_group['epoch'],
                y=pivot_group['min_train_metric'],
                mode='lines',
                name=f'Min Train ({val})',
                line=dict(color=color_map[i], dash='dash')
            ))

        train_fig.update_layout(
            title=f"Train {selected_metric.capitalize()} Summary Over Epochs",
            xaxis_title="Epoch",
            yaxis_title=f"Train {selected_metric.capitalize()}"
        )

        # Prepare figure for validation metrics
        val_fig = go.Figure()

        for i, val in enumerate(unique_pivot_vals):
            pivot_group = summary_df[summary_df[pivot_by] == val]

            val_fig.add_trace(go.Scatter(
                x=pivot_group['epoch'],
                y=pivot_group['min_val_metric'],
                mode='lines',
                name=f'Min Val ({val})',
                line=dict(color=color_map[i], dash='dash')
            ))

        val_fig.update_layout(
            title=f"Validation {selected_metric.capitalize()} Summary Over Epochs",
            xaxis_title="Epoch",
            yaxis_title=f"Validation {selected_metric.capitalize()}"
        )

    elif view_mode == 'comparison':
        # Exclude columns that contain lists or unhashable types
        comparison_cols = [col for col in df.columns if col not in ['epoch', train_metric_col, val_metric_col, pivot_by] and not isinstance(df[col].iloc[0], list)]

        # Filter out cases where only the pivot_by value differs
        comparison_df = df.groupby(comparison_cols).filter(lambda x: x[pivot_by].nunique() > 1)

        train_fig = px.line(comparison_df, x="epoch", y=train_metric_col, color=pivot_by, markers=True,
                            labels={
                                "epoch": "Epoch",
                                train_metric_col: f"Train {selected_metric.capitalize()}",
                                pivot_by: pivot_by.replace("_", " ").capitalize()
                            },
                            title=f"Train {selected_metric.capitalize()} Comparison Over Epochs")
        
        val_fig = px.line(comparison_df, x="epoch", y=val_metric_col, color=pivot_by, markers=True,
                        labels={
                            "epoch": "Epoch",
                            val_metric_col: f"Validation {selected_metric.capitalize()}",
                            pivot_by: pivot_by.replace("_", " ").capitalize()
                        },
                        title=f"Validation {selected_metric.capitalize()} Comparison Over Epochs")


    else:  # Individual mode
        # Plot for training metrics (individual performance)
        train_fig = px.line(df, x="epoch", y=train_metric_col, color=pivot_by, markers=True,
                            labels={
                                "epoch": "Epoch",
                                train_metric_col: f"Train {selected_metric.capitalize()}",
                                pivot_by: pivot_by.replace("_", " ").capitalize()
                            },
                            title=f"Train {selected_metric.capitalize()} Over Epochs")
        
        # Plot for validation metrics (individual performance)
        val_fig = px.line(df, x="epoch", y=val_metric_col, color=pivot_by, markers=True,
                          labels={
                              "epoch": "Epoch",
                              val_metric_col: f"Validation {selected_metric.capitalize()}",
                              pivot_by: pivot_by.replace("_", " ").capitalize()
                          },
                          title=f"Validation {selected_metric.capitalize()} Over Epochs")

    return train_fig, val_fig

if __name__ == '__main__':
    app.run_server(debug=True)
