In [1]:
%pip install plotly pandas

# 📚 Imports
import pandas as pd
import plotly.express as px
import os

# 📂 Ensure output folder exists
os.makedirs("visuals", exist_ok=True)

# 📥 Load CSVs
fp16 = pd.read_csv("data/evaluation_results_FP16.csv")
int8 = pd.read_csv("data/evaluation_results_INT8.csv")
int4 = pd.read_csv("data/evaluation_results_INT4.csv")

# 🏷️ Add precision column
fp16["Precision"] = "FP16"
int8["Precision"] = "INT8"
int4["Precision"] = "INT4"

# 🔗 Combine into one DataFrame
combined = pd.concat([fp16, int8, int4], ignore_index=True)

# ✅ Ensure numeric columns
combined["BLEU Score"] = pd.to_numeric(combined["BLEU Score"], errors="coerce")
combined["Latency (ms)"] = pd.to_numeric(combined["Latency (ms)"], errors="coerce")

# 📌 Create a unique variant ID
combined["variant_id"] = combined["Model"] + "_" + combined["Precision"]

# ========== 1️⃣ FP16 BLEU Score Comparison ==========
fig1 = px.bar(fp16, x="Model", y="BLEU Score", color="Model",
              title="1️⃣ BLEU Score Comparison Within FP16",
              labels={"BLEU Score": "BLEU Score", "Model": "Model"})

fig1.update_layout(xaxis_tickangle=-45)
fig1.write_html("visuals/interactive_FP16_bleu.html")

# ========== 2️⃣ INT8 BLEU Score Comparison ==========
fig2 = px.bar(int8, x="Model", y="BLEU Score", color="Model",
              title="2️⃣ BLEU Score Comparison Within INT8",
              labels={"BLEU Score": "BLEU Score", "Model": "Model"})

fig2.update_layout(xaxis_tickangle=-45)
fig2.write_html("visuals/interactive_INT8_bleu.html")

# ========== 3️⃣ INT4 BLEU Score Comparison ==========
fig3 = px.bar(int4, x="Model", y="BLEU Score", color="Model",
              title="3️⃣ BLEU Score Comparison Within INT4",
              labels={"BLEU Score": "BLEU Score", "Model": "Model"})

fig3.update_layout(xaxis_tickangle=-45)
fig3.write_html("visuals/interactive_INT4_bleu.html")

# ========== 4️⃣ Average BLEU & Latency Across Precisions ==========
avg_metrics = combined.groupby("Precision")[["BLEU Score", "Latency (ms)"]].mean().reset_index()
avg_melted = avg_metrics.melt(id_vars="Precision", var_name="Metric", value_name="Value")

fig4 = px.bar(avg_melted, x="Metric", y="Value", color="Precision", barmode="group",
              title="4️⃣ Average BLEU and Latency Across Precisions",
              labels={"Value": "Average Value", "Metric": "Metric"})

fig4.write_html("visuals/interactive_avg_metrics.html")

# ========== 5️⃣ BLEU Comparison Across All Model Variants ==========
fig5 = px.bar(combined, x="variant_id", y="BLEU Score", color="Precision",
              title="5️⃣ BLEU Score Comparison Across All Model Variants",
              labels={"BLEU Score": "BLEU Score", "variant_id": "Model + Precision"})

fig5.update_layout(xaxis_tickangle=-90)
fig5.write_html("visuals/interactive_all_variants_bleu.html")


Collecting plotlyNote: you may need to restart the kernel to use updated packages.

  Downloading plotly-6.0.1-py3-none-any.whl.metadata (6.7 kB)
Collecting pandas
  Downloading pandas-2.2.3-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting narwhals>=1.15.1 (from plotly)
  Downloading narwhals-1.34.0-py3-none-any.whl.metadata (9.2 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading plotly-6.0.1-py3-none-any.whl (14.8 MB)
   ---------------------------------------- 0.0/14.8 MB ? eta -:--:--
    --------------------------------------- 0.3/14.8 MB ? eta -:--:--
   - -------------------------------------- 0.5/14.8 MB 2.1 MB/s eta 0:00:07
   -- ------------------------------------- 0.8/14.8 MB 2.2 MB/s eta 0:00:07
   ---- ----------------------------------- 1.6/14.8 MB 2.2 MB/s eta 0:00:07
   ---- ----------------------------------- 1.8/14.8 MB 2.1 MB/s eta 0:00:07
   ------ --------------------------------- 2.4/14.8

In [2]:
# Add these new visualizations to your existing notebook

# ========== 6️⃣ Performance vs. Efficiency Scatter Plot ==========
# This shows the trade-off between BLEU score and latency
fig6 = px.scatter(combined, x="Latency (ms)", y="BLEU Score", 
                  color="Precision", symbol="Model", size="BLEU Score",
                  hover_data=["Model", "Precision", "BLEU Score", "Latency (ms)"],
                  title="6️⃣ Performance vs. Efficiency Trade-off",
                  labels={"BLEU Score": "BLEU Score (higher is better)", 
                          "Latency (ms)": "Latency in ms (lower is better)"})

# Add a trend line
fig6.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
fig6.update_layout(legend_title_text='Precision')
fig6.write_html("visuals/interactive_performance_efficiency.html")

# ========== 7️⃣ Precision Degradation Analysis ==========
# First, create a pivot table to compare the same model across different precisions
pivot_df = combined.pivot_table(index="Model", columns="Precision", 
                              values="BLEU Score", aggfunc="first").reset_index()

# Calculate degradation percentages
for precision in ["INT8", "INT4"]:
    pivot_df[f"{precision} vs FP16 (%)"] = ((pivot_df[precision] - pivot_df["FP16"]) / pivot_df["FP16"] * 100).round(2)

# Melt the dataframe for visualization
degradation_df = pivot_df.melt(id_vars="Model", 
                              value_vars=["INT8 vs FP16 (%)", "INT4 vs FP16 (%)"],
                              var_name="Comparison", value_name="Degradation (%)")

fig7 = px.bar(degradation_df, x="Model", y="Degradation (%)", color="Comparison",
             barmode="group", title="7️⃣ BLEU Score Degradation Relative to FP16",
             labels={"Degradation (%)": "% Change from FP16 (negative = worse)"})

fig7.update_layout(xaxis_tickangle=-45)
fig7.write_html("visuals/interactive_precision_degradation.html")

# ========== 8️⃣ Model Size vs. Performance ==========
# If you have model size data, you can add it to your combined dataframe
# This is a placeholder - you'll need to add the actual model size data
# Let's assume you have a dictionary mapping model names to their sizes in MB
model_sizes = {
    "MODEL_A": 350,
    "MODEL_B": 420, 
    "MODEL_C": 500,
    "MODEL_D": 650,
    # Add all your models here
}

# Add model size to the combined dataframe
combined["Model Size (MB)"] = combined["Model"].map(model_sizes)

# Create a bubble chart
fig8 = px.scatter(combined, x="Model Size (MB)", y="BLEU Score", 
                 size="Latency (ms)", color="Precision", symbol="Model",
                 hover_data=["Model", "Precision", "BLEU Score"],
                 title="8️⃣ Model Size vs. Performance Trade-off",
                 labels={"BLEU Score": "BLEU Score", "Model Size (MB)": "Model Size (MB)"})

fig8.update_layout(xaxis_title="Model Size (MB)")
fig8.write_html("visuals/interactive_size_performance.html")

# ========== 9️⃣ Performance Distribution Boxplots ==========
fig9 = px.box(combined, x="Precision", y="BLEU Score", color="Precision",
             points="all", title="9️⃣ BLEU Score Distribution by Precision",
             labels={"BLEU Score": "BLEU Score", "Precision": "Precision"})

fig9.write_html("visuals/interactive_performance_distribution.html")

# ========== 🔟 Performance Radar Charts ==========
# This creates a radar chart to compare multiple metrics for each precision
# Let's assume you have multiple metrics in your data
# If not, you could use other columns or calculate additional metrics

# Create a sample dataframe with multiple metrics
# In a real scenario, you'd use your actual metrics
metrics_df = pd.DataFrame({
    "Precision": ["FP16", "INT8", "INT4"],
    "BLEU Score": avg_metrics["BLEU Score"].tolist(),
    "Speed (1/Latency)": (1000 / avg_metrics["Latency (ms)"]).tolist(),
    "Memory Efficiency": [1.0, 2.0, 4.0],  # Relative to FP16
    "Inference Throughput": [1.0, 1.8, 3.5]  # Relative to FP16
})

# Normalize the metrics for better visualization
for col in metrics_df.columns:
    if col != "Precision":
        max_val = metrics_df[col].max()
        metrics_df[col] = metrics_df[col] / max_val

# Create the radar chart
metrics_melted = metrics_df.melt(id_vars="Precision", var_name="Metric", value_name="Value")

fig10 = px.line_polar(metrics_melted, r="Value", theta="Metric", color="Precision", line_close=True,
                     title="🔟 Multi-metric Performance Comparison",
                     range_r=[0, 1])

fig10.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 1])))
fig10.write_html("visuals/interactive_radar_chart.html")

# ========== 1️⃣1️⃣ Interactive Model Selector Dashboard ==========
# Create a combined dashboard with model selector
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Function to create a model comparison dashboard
def create_model_comparison(model_name):
    model_data = combined[combined["Model"] == model_name]
    
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=("BLEU Score by Precision", "Latency by Precision", 
                       "BLEU vs Latency", "Precision Comparison"),
        specs=[[{"type": "bar"}, {"type": "bar"}],
              [{"type": "scatter"}, {"type": "bar"}]]
    )
    
    # BLEU Score by Precision
    fig.add_trace(
        go.Bar(x=model_data["Precision"], y=model_data["BLEU Score"], name="BLEU Score",
              marker_color=['#1f77b4', '#ff7f0e', '#2ca02c']),
        row=1, col=1
    )
    
    # Latency by Precision
    fig.add_trace(
        go.Bar(x=model_data["Precision"], y=model_data["Latency (ms)"], name="Latency",
              marker_color=['#1f77b4', '#ff7f0e', '#2ca02c']),
        row=1, col=2
    )
    
    # BLEU vs Latency Scatter
    fig.add_trace(
        go.Scatter(x=model_data["Latency (ms)"], y=model_data["BLEU Score"], mode="markers+text",
                  marker=dict(size=12, color=['#1f77b4', '#ff7f0e', '#2ca02c']),
                  text=model_data["Precision"], textposition="top center"),
        row=2, col=1
    )
    
    # If you have the precision degradation data
    if "MODEL_A" in model_sizes:  # Just a check to ensure we have the previous code executed
        model_pivot = pivot_df[pivot_df["Model"] == model_name]
        
        if not model_pivot.empty:
            degradation_data = {
                "Precision": ["FP16", "INT8", "INT4"],
                "BLEU Score": [
                    model_pivot["FP16"].values[0],
                    model_pivot["INT8"].values[0],
                    model_pivot["INT4"].values[0]
                ]
            }
            
            degradation_df = pd.DataFrame(degradation_data)
            degradation_df["Relative"] = degradation_df["BLEU Score"] / degradation_df["BLEU Score"].max()
            
            fig.add_trace(
                go.Bar(x=degradation_df["Precision"], y=degradation_df["Relative"], 
                      name="Relative Score", marker_color=['#1f77b4', '#ff7f0e', '#2ca02c'],
                      text=[f"{x:.2f}" for x in degradation_df["BLEU Score"]], textposition="auto"),
                row=2, col=2
            )
    
    # Update layout
    fig.update_layout(height=800, width=1000, 
                     title_text=f"1️⃣1️⃣ Detailed Analysis for {model_name}",
                     showlegend=False)
    
    return fig

# Create individual dashboard for each model
for model in combined["Model"].unique():
    fig = create_model_comparison(model)
    fig.write_html(f"visuals/interactive_dashboard_{model}.html")

# Create an index for all model-specific dashboards
models_list = combined["Model"].unique().tolist()

In [3]:
metrics_df = pd.DataFrame({
    "Precision": ["FP16", "INT8", "INT4"],
    "BLEU Score": avg_metrics["BLEU Score"].tolist(),
    "Speed (1/Latency)": (1000 / avg_metrics["Latency (ms)"]).tolist(),
    "Memory Efficiency": [1.0, 2.0, 4.0],  # Customize if needed
    "Inference Throughput": [1.0, 1.8, 3.5]
})

# Normalize metrics
for col in metrics_df.columns[1:]:
    metrics_df[col] = metrics_df[col] / metrics_df[col].max()

metrics_melted = metrics_df.melt(id_vars="Precision", var_name="Metric", value_name="Value")

fig10 = px.line_polar(metrics_melted, r="Value", theta="Metric", color="Precision", line_close=True,
                     title="🔟 Multi-metric Performance Comparison",
                     range_r=[0, 1])

fig10.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 1])))
fig10.write_html("visuals/interactive_radar_chart.html")
