In [1]:
import os
import json
import pandas as pd
import sys
import altair as alt

def load_json_files(directory):
    data_list = []
    file_map = {}

    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.endswith(".json"):
                file_path = os.path.join(root, filename)
                try:
                    with open(file_path, "r") as f:
                        data = json.load(f)
                        data_list.append(data)
                        file_map[len(data_list) - 1] = file_path  # Track index to file
                except Exception as e:
                    print(f"Error reading {file_path}: {e}", file=sys.stderr)

    if not data_list:
        print("No valid JSON files found.", file=sys.stderr)
        return None

    df = pd.DataFrame(data_list)

    nan_rows = df.isna().any(axis=1)
    if nan_rows.any():
        print("Files with NaN values (dropped):")
        for idx in df[nan_rows].index:
            print(f"{file_map.get(idx, 'Unknown file')}")
        df = df.dropna()  # Drop rows with NaN values

    if "success" in df.columns:
        failed_rows = df["success"] == False
        if failed_rows.any():
            print("Files where 'success' is False:")
            for idx in df[failed_rows].index:
                print(f"{file_map.get(idx, 'Unknown file')}")
    sorted_df = df.sort_values(by=["m", "k", "n", "world_size"])
    return sorted_df


In [2]:
df = load_json_files(f"../../../slurm_logs/latest")

df = df[df['m'] != 0]
df = df[df['k'] != 0]
df = df[df['n'] != 0]

df['algo'] = df['output_file'].str.extract(r'(all_gather|all_reduce)')
df['MKN'] = df['M'] * df['N'] * df['K']

baseline = df[df['world_size'] == 1][['M', 'N', 'K', 'algo', 'ms']].rename(columns={'ms': 'baseline_ms'})
df = df.merge(baseline, on=['M', 'N', 'K', 'algo'], how='inner')
df['speedup'] = df['baseline_ms'] / df['ms']

df['efficiency'] = df['speedup'] / df['world_size']


In [17]:
algo_selection = alt.selection_point(fields=['algo'], bind='legend', name='Algorithm')
rank_selection = alt.selection_point(fields=['world_size'], bind='legend', name='World Size')

alt.Chart(df).mark_point(size=60).encode(
    x=alt.X('MKN:Q', scale=alt.Scale(type='log'), title='MxKxN (log scale)'),
    y=alt.Y('flops:Q', scale=alt.Scale(type='log'), title='FLOPs (log scale)'),
    shape=alt.Shape('algo:N', legend=alt.Legend(title="Algorithm")),
    color=alt.Color('world_size:O', legend=alt.Legend(title="World Size"), scale=alt.Scale(scheme='viridis')),
    opacity=alt.condition(algo_selection & rank_selection, alt.value(1), alt.value(0.1)),
    tooltip=['algo', 'world_size', 'm', 'n', 'k', 'MKN', 'flops', 'ms', 'speedup']
).add_params(
    algo_selection,
    rank_selection
).properties(
    title='FLOPs vs MKN'
).interactive()


In [22]:

algo = 'all_gather'
alt.Chart(df[df['algo'] == algo]).mark_circle(size=30).encode(
    x=alt.X('MKN:Q', scale=alt.Scale(type='log'), title='MxKxN (log scale)'),
    y=alt.Y('flops:Q', scale=alt.Scale(type='log'), title='FLOPs (log scale)'),
    color=alt.Color('world_size:O', scale=alt.Scale(scheme='viridis')),
    tooltip=['world_size', 'm', 'n', 'k', 'MKN', 'flops', 'ms', 'speedup']
).properties(
    title=f'FLOPs vs MKN ({algo})'
).interactive()


In [23]:
algo = 'all_reduce'
alt.Chart(df[df['algo'] == algo]).mark_circle(size=30).encode(
    x=alt.X('MKN:Q', scale=alt.Scale(type='log'), title='MxKxN (log scale)'),
    y=alt.Y('flops:Q', scale=alt.Scale(type='log'), title='FLOPs (log scale)'),
    color=alt.Color('world_size:O', scale=alt.Scale(scheme='viridis')),
    tooltip=['world_size', 'm', 'n', 'k', 'MKN', 'flops', 'ms', 'speedup']
).properties(
    title=f'FLOPs vs MKN ({algo})'
).interactive()


In [25]:
algo = 'all_reduce'
alt.Chart(df[df['algo'] == algo]).mark_circle(size=30).encode(
    x=alt.X('MKN:Q', scale=alt.Scale(type='log'), title='MxKxN (log scale)'),
    y=alt.Y('speedup:Q', scale=alt.Scale(type='log'), title='Speedup relative to 1 rank (log scale)'),
    color=alt.Color('world_size:O', scale=alt.Scale(scheme='viridis')),
    tooltip=['world_size', 'm', 'n', 'k', 'MKN', 'flops', 'ms', 'speedup']
).properties(
    title=f'FLOPs vs Speedup ({algo})'
).interactive()

In [26]:
algo = 'all_gather'
alt.Chart(df[df['algo'] == algo]).mark_circle(size=30).encode(
    x=alt.X('MKN:Q', scale=alt.Scale(type='log'), title='MxKxN (log scale)'),
    y=alt.Y('speedup:Q', scale=alt.Scale(type='log'), title='Speedup relative to 1 rank (log scale)'),
    color=alt.Color('world_size:O', scale=alt.Scale(scheme='viridis')),
    tooltip=['world_size', 'm', 'n', 'k', 'MKN', 'flops', 'ms', 'speedup']
).properties(
    title=f'FLOPs vs Speedup ({algo})'
).interactive()

In [9]:
algo_selection = alt.selection_point(fields=['algo'], bind='legend', name='Algorithm')
rank_selection = alt.selection_point(fields=['world_size'], bind='legend', name='World Size')

alt.Chart(df).mark_circle(size=20).encode(
    x=alt.X('MKN:Q', scale=alt.Scale(type='log'), title='MxKxN (log scale)'),
    y=alt.Y('speedup:Q', scale=alt.Scale(type='log'), title='Speedup (log scale)'),
    color=alt.Color('world_size:O', scale=alt.Scale(scheme='viridis')),
    shape=alt.Shape('algo:N'),
    opacity=alt.condition(algo_selection & rank_selection, alt.value(1), alt.value(0.1)),
    tooltip=['world_size', 'algo', 'M', 'N', 'K', 'MKN', 'ms', 'speedup']
).add_params(
    algo_selection,
    rank_selection
).properties(
    title='Speedup vs MKN'
).interactive()


In [5]:
algo_selection = alt.selection_point(fields=['algo'], bind='legend', name='Algorithm')
rank_selection = alt.selection_point(fields=['world_size'], bind='legend', name='World Size')

alt.Chart(df).mark_circle(size=60).encode(
    x=alt.X('MKN:Q', scale=alt.Scale(type='log'), title='Global MxKxN (log scale)'),
    y=alt.Y('efficiency:Q', scale=alt.Scale(type='log'), title='Efficiency (log scale)'),
    color=alt.Color('world_size:O', scale=alt.Scale(scheme='viridis')),
    shape=alt.Shape('algo:N'),
    opacity=alt.condition(algo_selection & rank_selection, alt.value(1), alt.value(0.1)),
    tooltip=['algo', 'world_size', 'M', 'N', 'K', 'MKN', 'speedup', 'efficiency']
).add_params(
    algo_selection,
    rank_selection
).properties(
    title='Efficiency vs Global MxKxN (Logâ€“Log Scale)'
).interactive()


In [6]:
df[['M', 'N', 'K']].drop_duplicates().shape


(153, 3)

In [7]:
df[df['world_size'] == 1][['M', 'N', 'K']].drop_duplicates().shape


(153, 3)

In [8]:
df['algo'].unique()

array(['all_reduce', 'all_gather'], dtype=object)