In [1]:
# All imports
import pandas as pd
import matplotlib.pyplot as plt
import os

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Directory containing the CSV files
csv_dir = "/home/hice1/kkang68/scratch/meadow/results/"
# List of provided filenames
resnet_results = [
    "resnet-152_20241103-174623_lr0.0001_bs64.csv",
    "resnet-50_20241107-111821_lr0.0001_bs64.csv",
    "resnet-34_20241114-123531_lr1.00e-04_bs128[e40estop7].csv",
]

snapshot_ensemble_results = [
    "resnet-50_20241114-232226_lr2.00e-01_bs128_snapshot-pl5[e40].csv",
    "resnet-50_20241118-130313_lr2.00e-01_bs128_snapshot-pl3[e60].csv",
    "resnet-34_20241113-212612_lr2.00e-01_bs128_snapshot-pl5[e40].csv",
    "resnet-34_20241116-174004_lr2.00e-01_bs128_snapshot-pl3[e60].csv",
]

moe_results = [
    "MoE3_resnet-50_resnet-domain-50_20241118-081710_lr1.00e-04_bs128[snap-5_lexp-3-learnMapper].csv",
    "MoE5_resnet-34_resnet-domain-50_20241116-130542_lr1.00e-03_bs128[learnAllSteep-reworkForward-smaxRouter].csv",
    "MoE5_resnet-50_resnet-domain-50_20241118-152917_lr1.00e-04_bs128[snap-5_lexp-3_learnMapper_uniformInit].csv",
]

vit_results = [
    "vit-16_20241113-204439_lr1.00e-04_bs64[nofreeze].csv",
    "vit-16_20241116-165014_lr1.00e-04_bs64[freeze].csv",
]

In [3]:
def rank_files(file_list):
    results = []
    for file in file_list:
        file_path = os.path.join(csv_dir, file)
        try:
            # Read the CSV
            df = pd.read_csv(file_path)
            # Strip whitespace from the 'Unnamed: 0' column
            df["Unnamed: 0"] = df["Unnamed: 0"].str.strip()
            # Extract accuracy for 'test' and 'id_test' columns
            acc_test = df.loc[df["Unnamed: 0"] == "acc_avg", "test"].values[0]
            acc_id_test = df.loc[df["Unnamed: 0"] == "acc_avg", "id_test"].values[0]
            results.append((file, acc_test, acc_id_test))
        except Exception as e:
            print(f"Error processing {file}: {e}")
            continue
    
    # Sort files first by 'test' accuracy, then by 'id_test' accuracy
    results = sorted(results, key=lambda x: (x[1], x[2]), reverse=True)
    return results

# Run the updated function
ranked_resnet_results = rank_files(resnet_results)
ranked_snapshot_ensemble_results = rank_files(snapshot_ensemble_results)
ranked_moe_results = rank_files(moe_results)
ranked_vit_results = rank_files(vit_results)

# Print results
results_summary = {
    "ResNet Results": ranked_resnet_results,
    "Snapshot Ensemble Results": ranked_snapshot_ensemble_results,
    "MoE Results": ranked_moe_results,
    "ViT Results": ranked_vit_results,
}

# Print the ranked results
for category, results in results_summary.items():
    print(f"\n{category}:")
    for rank, result in enumerate(results, start=1):
        print(f"{rank}. File: {result[0]}, Test Accuracy: {result[1]}, Test ID Accuracy: {result[2]}")


ResNet Results:
1. File: resnet-50_20241107-111821_lr0.0001_bs64.csv, Test Accuracy: 0.738192617893219, Test ID Accuracy: 0.7257787585258484
2. File: resnet-152_20241103-174623_lr0.0001_bs64.csv, Test Accuracy: 0.7211329340934753, Test ID Accuracy: 0.7267598509788513
3. File: resnet-34_20241114-123531_lr1.00e-04_bs128[e40estop7].csv, Test Accuracy: 0.6820592880249023, Test ID Accuracy: 0.6802796125411987

Snapshot Ensemble Results:
1. File: resnet-50_20241114-232226_lr2.00e-01_bs128_snapshot-pl5[e40].csv, Test Accuracy: 0.6775256395339966, Test ID Accuracy: 0.7010056376457214
2. File: resnet-50_20241118-130313_lr2.00e-01_bs128_snapshot-pl3[e60].csv, Test Accuracy: 0.6773152947425842, Test ID Accuracy: 0.7011283040046692
3. File: resnet-34_20241113-212612_lr2.00e-01_bs128_snapshot-pl5[e40].csv, Test Accuracy: 0.6424949169158936, Test ID Accuracy: 0.6566102504730225
4. File: resnet-34_20241116-174004_lr2.00e-01_bs128_snapshot-pl3[e60].csv, Test Accuracy: 0.6406720876693726, Test ID Accu

In [4]:
import pandas as pd
import os
import numpy as np

# Directory containing the CSV files
csv_dir = "/home/hice1/kkang68/scratch/meadow/results/"

# Lists of results
resnet_results = [
    "resnet-152_20241103-174623_lr0.0001_bs64.csv",
    "resnet-50_20241107-111821_lr0.0001_bs64.csv",
    "resnet-34_20241114-123531_lr1.00e-04_bs128[e40estop7].csv",
]

snapshot_ensemble_results = [
    "resnet-50_20241114-232226_lr2.00e-01_bs128_snapshot-pl5[e40].csv",
    "resnet-50_20241118-130313_lr2.00e-01_bs128_snapshot-pl3[e60].csv",
    "resnet-34_20241113-212612_lr2.00e-01_bs128_snapshot-pl5[e40].csv",
    "resnet-34_20241116-174004_lr2.00e-01_bs128_snapshot-pl3[e60].csv",
]

moe_results = [
    "MoE3_resnet-50_resnet-domain-50_20241118-081710_lr1.00e-04_bs128[snap-5_lexp-3-learnMapper].csv",
    "MoE5_resnet-34_resnet-domain-50_20241116-130542_lr1.00e-03_bs128[learnAllSteep-reworkForward-smaxRouter].csv",
    "MoE5_resnet-50_resnet-domain-50_20241118-152917_lr1.00e-04_bs128[snap-5_lexp-3_learnMapper_uniformInit].csv",
]

vit_results = [
    "vit-16_20241113-204439_lr1.00e-04_bs64[nofreeze].csv",
    "vit-16_20241116-165014_lr1.00e-04_bs64[freeze].csv",
]

# Helper function to extract metrics
def extract_metrics(file_list, csv_dir):
    results = []
    for file in file_list:
        file_path = os.path.join(csv_dir, file)
        try:
            # Read the CSV file
            df = pd.read_csv(file_path)
            # Strip whitespace from the 'Unnamed: 0' column
            df["Unnamed: 0"] = df["Unnamed: 0"].str.strip()
            # Extract accuracy, recall, and F1 for 'test'
            acc_test = df.loc[df["Unnamed: 0"] == "acc_avg", "test"].values[0]
            recall_test = df.loc[df["Unnamed: 0"] == "recall-macro_all", "test"].values[0]
            f1_test = df.loc[df["Unnamed: 0"] == "F1-macro_all", "test"].values[0]
            results.append({"Model": file, "Test Accuracy": acc_test, "Test Recall": recall_test, "Test F1": f1_test})
        except Exception as e:
            print(f"Error processing {file}: {e}")
    return results

# Extract metrics for each category
resnet_metrics = extract_metrics(resnet_results, csv_dir)
snapshot_ensemble_metrics = extract_metrics(snapshot_ensemble_results, csv_dir)
moe_metrics = extract_metrics(moe_results, csv_dir)
vit_metrics = extract_metrics(vit_results, csv_dir)

# Combine all metrics into a structured DataFrame
rows = []

# Add ResNet results
rows.extend(resnet_metrics)

# Add Snapshot Ensemble results
rows.append({"Model": "--- Snapshot Ensemble ---", "Test Accuracy": None, "Test Recall": None, "Test F1": None})
rows.extend(snapshot_ensemble_metrics)

# Add MoE results
rows.append({"Model": "--- Mixture of Experts (MoE) ---", "Test Accuracy": None, "Test Recall": None, "Test F1": None})
rows.extend(moe_metrics)

# Add ViT results
rows.append({"Model": "--- Vision Transformer (ViT) ---", "Test Accuracy": None, "Test Recall": None, "Test F1": None})
rows.extend(vit_metrics)

# Create DataFrame and format
table_df = pd.DataFrame(rows)
table_df["Model"] = table_df["Model"].str.replace(r".*\.csv", "", regex=True)  # Clean up file names for clarity
table_df = table_df.reset_index(drop=True)  # Remove the index column

# Format sub-headers to stand out
table_df.loc[table_df["Model"].str.contains("---"), ["Test Accuracy", "Test Recall", "Test F1"]] = None

# Display the table
print(table_df.to_string(index=False))

                           Model  Test Accuracy  Test Recall  Test F1
                                       0.721133     0.272167 0.275299
                                       0.738193     0.260434 0.266368
                                       0.682059     0.223976 0.233204
       --- Snapshot Ensemble ---            NaN          NaN      NaN
                                       0.677526     0.255944 0.251981
                                       0.677315     0.262979 0.257199
                                       0.642495     0.227946 0.223529
                                       0.640672     0.226629 0.221488
--- Mixture of Experts (MoE) ---            NaN          NaN      NaN
                                       0.681311     0.266920 0.260069
                                       0.647613     0.222860 0.218446
                                       0.678951     0.248945 0.245326
--- Vision Transformer (ViT) ---            NaN          NaN      NaN
                    

In [5]:
import pandas as pd
import plotly.graph_objects as go

# Create the DataFrame
data = {
    "Model": [
        "ResNet 152", "ResNet 50", "ResNet 34",
        "Snapshot Ensemble", 
        "SE 50, 3 experts", "SE 50, 3 experts", "SE 34, 5 experts", "SE 34, 3 experts",
        "Mixture of Experts (MoE)", 
        "MoE 50, 3 experts", "MoE 34, 5 experts", "MoE 34, 3 experts",
        "Vision Transformer (ViT)", 
        "ViT 16 No Freeze", "ViT 16 Freeze"
    ],
    "Test Accuracy": [
        0.721133, 0.738193, 0.682059, None,
        0.677526, 0.677315, 0.642495, 0.640672,
        None,
        0.681311, 0.647613, 0.678951,
        None,
        0.561871, 0.448202
    ],
    "Test Recall": [
        0.272167, 0.260434, 0.223976, None,
        0.255944, 0.262979, 0.227946, 0.226629,
        None,
        0.266920, 0.222860, 0.248945,
        None,
        0.196435, 0.098078
    ],
    "Test F1": [
        0.275299, 0.266368, 0.233204, None,
        0.251981, 0.257199, 0.223529, 0.221488,
        None,
        0.260069, 0.218446, 0.245326,
        None,
        0.169430, 0.088369
    ]
}

df = pd.DataFrame(data)

# Define colors
header_color = 'lightblue'
subheader_color = 'lightgrey'
cell_color = 'white'
nan_color = 'white'

# Prepare the table for Plotly
fig = go.Figure(data=[go.Table(
    header=dict(
        values=["<b>Model</b>", "<b>Test Accuracy</b>", "<b>Test Recall</b>", "<b>Test F1</b>"],
        fill_color=header_color,
        align='center',
        font=dict(size=12, color='black')
    ),
    cells=dict(
        values=[
            df["Model"], 
            df["Test Accuracy"].fillna(''),  # Replace NaN with an empty string
            df["Test Recall"].fillna(''),
            df["Test F1"].fillna('')
        ],
        fill_color=[
            [subheader_color if "Snapshot" in model or "Mixture" in model or "Vision" in model else cell_color for model in df["Model"]],
            cell_color, cell_color, cell_color
        ],
        align='center',
        font=dict(size=11, color='black')
    )
)])

# Show table
fig.update_layout(
    title=dict(
        text="Experimental Results Table",
        x=0.5,
        font=dict(size=16)
    ),
    margin=dict(l=10, r=10, t=30, b=10)
)

# Export table as an image or HTML
fig.write_image("results_table_colored.png")
fig.write_html("results_table_colored.html")
fig.show()

ModuleNotFoundError: No module named 'plotly'