In [2]:
import plotly.graph_objs as go
import plotly.io as pio
import numpy as np
import json

In [3]:
def generate_from_json(fig, json_file_path, trace_name, key, smooth=False, window_size=10):
    """
    Add a loss trace to a Plotly figure for a given loss type from a JSON file.

    Args:
        fig (go.Figure): A Plotly figure object to which the trace will be added.
        json_file_path (str): Path to the JSON file containing loss data.
        trace_name (str): Name of the trace (e.g., "Train Loss", "Eval Loss").
        key (str): Key in the JSON file to extract loss data (e.g., "loss", "eval_loss").
        smooth (bool): Whether to smooth the data using a moving average. Default is False.
        window_size (int): Window size for smoothing. Default is 10.

    Returns:
        go.Figure: The updated Plotly figure with the new trace added.
    """
    # Read JSON lines from the file
    with open(json_file_path, 'r') as file:
        data = [json.loads(line) for line in file]
    
    # Extract epochs and loss data
    epochs = [entry["epoch"] for entry in data if key in entry]
    losses = [entry[key] for entry in data if key in entry]

    if "eval" in key:
        for i, epoch in enumerate(epochs):
            epoch = epoch - 1
            epochs[i] = epoch

    # Apply smoothing if requested
    if smooth:
        def moving_average(data, window_size):
            return np.convolve(data, np.ones(window_size) / window_size, mode='valid')

        losses = moving_average(losses, window_size)
        epochs = epochs[:len(losses)]  # Adjust epochs to match smoothed losses

    # Add the trace to the figure
    fig.add_trace(go.Scatter(x=epochs, y=losses, mode='lines+markers', name=trace_name))
    return fig


In [5]:
import ast
import os

def plot_f1_from_folder(fig, folder_path, trace_name, smooth=False, window_size=10):
    """
    Add an F1 score trace to a Plotly figure using only data points (no lines) from multiple files in a folder.

    Args:
        fig (go.Figure): A Plotly figure object to which the trace will be added.
        folder_path (str): Path to the folder containing evaluation metric files.
        trace_name (str): Name of the trace (e.g., "Eval F1").
        smooth (bool): Whether to smooth the F1 data using a moving average. Default is False.
        window_size (int): Window size for smoothing. Default is 10.

    Returns:
        go.Figure: The updated Plotly figure with the new F1 trace added.
    """
    # Initialize lists for epochs and F1 scores
    epochs = []
    f1_scores = []

    # Loop through all files in the folder
    for filename in sorted(os.listdir(folder_path)):
        if filename.startswith("eval_metrics_epoch_") and filename.endswith(".txt"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as file:
                data = ast.literal_eval(file.read().strip())
                if "eval_f1" in data and "epoch" in data:
                    epochs.append(data["epoch"])
                    f1_scores.append(data["eval_f1"])

    # Sort epochs and corresponding F1 scores
    sorted_data = sorted(zip(epochs, f1_scores))
    epochs, f1_scores = zip(*sorted_data)  # Unzip the sorted data

    # Apply smoothing if requested
    if smooth:
        def moving_average(data, window_size):
            return np.convolve(data, np.ones(window_size) / window_size, mode='valid')

        f1_scores = moving_average(f1_scores, window_size)
        epochs = epochs[:len(f1_scores)]  # Adjust epochs to match smoothed data

    epochs = list(epochs)
    for i, epoch in enumerate(epochs):
        epoch = epoch - 1
        epochs[i] = epoch

    # Add the trace with points only (no lines)
    fig.add_trace(go.Scatter(x=epochs, y=f1_scores, mode='markers', name=trace_name))
    return fig

In [6]:
def generate_sum_from_json(json_file_path, key):
    """
    Compute the sum of values for a specific key in a JSON file.

    Args:
        json_file_path (str): Path to the JSON file containing the data.
        key (str): Key in the JSON file to extract and sum values (e.g., "loss", "grad_norm").

    Returns:
        float: The sum of the values associated with the given key.
    """
    import json

    # Read JSON lines from the file
    with open(json_file_path, 'r') as file:
        data = [json.loads(line) for line in file]
    
    # Sum the values corresponding to the key
    total_sum = sum(entry[key] for entry in data if key in entry)
    
    return total_sum

In [353]:
fig = go.Figure()

# BENCHMARKS = ["glue", "super_glue"]
benchmark = "glue"

# "glue" datasets: ["cola", "mnli", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
# "super_glue" datasets: ["boolq", "cb", "copa", "rte", "wic", "wsc"]
dataset_name = "cola"

# experiments choose from ["Baseline", "Fair_baseline", "Gegenbauer", "Chebyshev_1", "Chebyshev_2", "Legendre"]

experiment_1 = "Fair_baseline"
experiment_2 = "Gegenbauer"

# Add training and evaluation traces
file_path_1 = f"/usrvol/final_results_no_checkpoint/{experiment_1}_BERT_FINAL_KAN_linear_no_layer/{benchmark}/{dataset_name}/losses.json"
file_path_1_f1 = f"/usrvol/final_results_no_checkpoint/{experiment_1}_BERT_FINAL_KAN_linear_no_layer/{benchmark}/{dataset_name}"
file_path_2 = f"/usrvol/final_results_no_checkpoint/{experiment_2}_BERT_FINAL_KAN_linear_no_layer/{benchmark}/{dataset_name}/losses.json"
file_path_2_f1 = f"/usrvol/final_results_no_checkpoint/{experiment_2}_BERT_FINAL_KAN_linear_no_layer/{benchmark}/{dataset_name}"

# Loss
fig = generate_from_json(fig=fig, json_file_path=file_path_1, trace_name=f"{experiment_1}_{dataset_name}", key="loss", smooth=True, window_size=10)
fig = generate_from_json(fig=fig, json_file_path=file_path_2, trace_name=f"{experiment_2}_{dataset_name}", key="loss", smooth=True, window_size=10)

# Eval Loss
fig = generate_from_json(fig=fig, json_file_path=file_path_1, trace_name=f"{experiment_1}_{dataset_name}_eval", key="eval_loss", smooth=False)
fig = generate_from_json(fig=fig, json_file_path=file_path_2, trace_name=f"{experiment_2}_{dataset_name}_eval", key="eval_loss", smooth=False)


# Gradient Norm
#fig = generate_from_json(fig=fig, json_file_path=file_path_1, trace_name=f"{experiment_1}_{dataset_name}_grad_norm", key="grad_norm", smooth=True, window_size=10)
#fig = generate_from_json(fig=fig, json_file_path=file_path_2, trace_name=f"{experiment_2}_{dataset_name}_grad_norm", key="grad_norm", smooth=True, window_size=10)

# F1
fig = plot_f1_from_folder(fig=fig, folder_path=file_path_1_f1, trace_name=f"Fair_MLP_{dataset_name}_F1", smooth=False)
fig = plot_f1_from_folder(fig=fig, folder_path=file_path_2_f1, trace_name=f"Gegenbauer_{dataset_name}_F1", smooth=False)

# Update layout
fig.update_layout(
    title=f"Evaluation of {benchmark}: {dataset_name}, for experiments {experiment_1} and {experiment_2}",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    template="plotly",
    showlegend=True
)

# Show the figure
fig.show()

In [247]:
print(f"{experiment_1} sum of normalized gradient: ", generate_sum_from_json(file_path_1, "grad_norm"))
print(f"{experiment_1} sum of Losses: ", generate_sum_from_json(file_path_1, "loss"))
print(f"{experiment_2} sum of normalized gradient: ", generate_sum_from_json(file_path_2, "grad_norm"))
print(f"{experiment_2} sum of Losses: ", generate_sum_from_json(file_path_2, "loss"))


Fair_baseline sum of normalized gradient:  461.5681892633438
Fair_baseline sum of Losses:  64.44160000000001
Chebyshev_2 sum of normalized gradient:  553.6044828891754
Chebyshev_2 sum of Losses:  64.0833


In [370]:
fig = go.Figure()

# BENCHMARKS = ["glue", "super_glue"]
benchmark = "super_glue"

# "glue" datasets: ["cola", "mnli", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
# "super_glue" datasets: ["boolq", "cb", "copa", "rte", "wic", "wsc"]
dataset_name = "wsc"

# experiments choose from ["Baseline", "Fair_baseline", "Gegenbauer", "Chebyshev_1", "Chebyshev_2", "Legendre"]

experiment_1 = "Chebyshev_1"
experiment_2 = "Chebyshev_2"
experiment_3 = "Legendre"
experiment_4 = "Gegenbauer"

# Add training and evaluation traces
file_path_1 = f"/usrvol/final_results_no_checkpoint/{experiment_1}_BERT_FINAL_KAN_linear_no_layer/{benchmark}/{dataset_name}/losses.json"
file_path_2 = f"/usrvol/final_results_no_checkpoint/{experiment_2}_BERT_FINAL_KAN_linear_no_layer/{benchmark}/{dataset_name}/losses.json"
file_path_3 = f"/usrvol/final_results_no_checkpoint/{experiment_3}_BERT_FINAL_KAN_linear_no_layer/{benchmark}/{dataset_name}/losses.json"
file_path_4 = f"/usrvol/final_results_no_checkpoint/{experiment_4}_BERT_FINAL_KAN_linear_no_layer/{benchmark}/{dataset_name}/losses.json"

# Loss
fig = generate_from_json(fig=fig, json_file_path=file_path_1, trace_name=f"{experiment_1}_{dataset_name}", key="loss", smooth=True, window_size=1)
fig = generate_from_json(fig=fig, json_file_path=file_path_2, trace_name=f"{experiment_2}_{dataset_name}", key="loss", smooth=True, window_size=10)
fig = generate_from_json(fig=fig, json_file_path=file_path_3, trace_name=f"{experiment_3}_{dataset_name}", key="loss", smooth=True, window_size=1)
fig = generate_from_json(fig=fig, json_file_path=file_path_4, trace_name=f"{experiment_4}_{dataset_name}", key="loss", smooth=True, window_size=1)

# Eval Loss
fig = generate_from_json(fig=fig, json_file_path=file_path_1, trace_name=f"{experiment_1}_{dataset_name}_eval", key="eval_loss", smooth=False)
fig = generate_from_json(fig=fig, json_file_path=file_path_2, trace_name=f"{experiment_2}_{dataset_name}_eval", key="eval_loss", smooth=False)
fig = generate_from_json(fig=fig, json_file_path=file_path_3, trace_name=f"{experiment_3}_{dataset_name}_eval", key="eval_loss", smooth=False)
fig = generate_from_json(fig=fig, json_file_path=file_path_4, trace_name=f"{experiment_4}_{dataset_name}_eval", key="eval_loss", smooth=False)

# Gradient Norm
#fig = generate_from_json(fig=fig, json_file_path=file_path_1, trace_name=f"{experiment_1}_{dataset_name}_grad_norm", key="grad_norm", smooth=True, window_size=10)
#fig = generate_from_json(fig=fig, json_file_path=file_path_2, trace_name=f"{experiment_2}_{dataset_name}_grad_norm", key="grad_norm", smooth=True, window_size=10)

# F1
#fig = plot_f1_from_folder(fig=fig, folder_path=file_path_1_f1, trace_name=f"Fair_MLP_{dataset_name}_F1", smooth=False)
#fig = plot_f1_from_folder(fig=fig, folder_path=file_path_2_f1, trace_name=f"Gegenbauer_{dataset_name}_F1", smooth=False)
#fig = plot_f1_from_folder(fig=fig, folder_path=file_path_1_f1, trace_name=f"{experiment_1}_{dataset_name}_F1", smooth=False)
# Update layout
fig.update_layout(
    title=f"Evaluation of {benchmark}: {dataset_name}, for experiments {experiment_1} and {experiment_2}",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    template="plotly",
    showlegend=True
)

# Show the figure
fig.show()