In [None]:
import torch
from pathlib import Path

# Set print options to show all elements without truncation
torch.set_printoptions(threshold=float('inf'))

model = "flame-moe-290m"
runid, epoch, layer = 31066, 1080, 2
shard = "0-0.pt"

samples = torch.load(Path(f"samples/{model}/{runid}", shard), map_location="cpu")
actives = torch.load(Path(f"actives/{model}/{runid}/{epoch}/{layer}", shard), map_location="cpu")
scores, indices = actives

# Slice to first 512 tokens
samples_sliced = samples[:512]
scores_sliced = scores[:512]
indices_sliced = indices[:512]

# Create output filename with metadata
shard_name = shard.replace('.pt', '')  # Remove .pt extension from shard
output_filename = f"{model}_runid{runid}_epoch{epoch}_layer{layer}_shard{shard_name}_512tokens"

# Save tensors to a .pt file with metadata
output_data = {
    'model': model,
    'runid': runid,
    'epoch': epoch,
    'layer': layer,
    'shard': shard,
    'samples': samples_sliced,
    'scores': scores_sliced,
    'indices': indices_sliced
}
torch.save(output_data, f'{output_filename}.pt')

# Save formatted output to a text file
with open(f'{output_filename}.txt', 'w') as f:
    # Write metadata header
    f.write("=" * 60 + "\n")
    f.write("METADATA\n")
    f.write("=" * 60 + "\n")
    f.write(f"Model: {model}\n")
    f.write(f"Run ID: {runid}\n")
    f.write(f"Epoch: {epoch}\n")
    f.write(f"Layer: {layer}\n")
    f.write(f"Shard: {shard}\n")
    f.write("=" * 60 + "\n\n")
    
    # Write data
    f.write("samples".center(40, "-") + "\n")
    f.write(f"{samples_sliced.shape}\n")
    f.write(f"{samples_sliced}\n\n")
    
    f.write("scores".center(40, "-") + "\n")
    f.write(f"{scores_sliced.shape}\n")
    f.write(f"{scores_sliced}\n\n")
    
    f.write("indices".center(40, "-") + "\n")
    f.write(f"{indices_sliced.shape}\n")
    f.write(f"{indices_sliced}\n")

print(f"Data saved to '{output_filename}.pt' and '{output_filename}.txt'")


Data saved to 'flame-moe-290m_runid31066_epoch1080_layer2_shard0-0_512tokens.pt' and 'flame-moe-290m_runid31066_epoch1080_layer2_shard0-0_512tokens.txt'


In [10]:
import torch
import csv

# Load the saved .pt file
input_filename = "flame-moe-290m_runid31066_epoch1080_layer2_shard0-0_512tokens"
data = torch.load(f'{input_filename}.pt')

# Extract data
samples = data['samples']
scores = data['scores']
indices = data['indices']

# Create CSV filename
csv_filename = f'{input_filename}.csv'
num_experts = 64  # Total number of experts (0-63)

with open(csv_filename, 'w', newline='') as csvfile:
    # Create header
    header = ['layer_id', 'token_id'] + [f'expert_{i}' for i in range(num_experts)]
    writer = csv.writer(csvfile)
    writer.writerow(header)
    
    # Write data for each token
    for token_idx in range(len(samples)):
        # Use layer_id = 0 and token_idx (0 to 511)
        row = [0, token_idx]
        
        # Initialize all experts with 0.000000 (formatted)
        expert_scores = [0.0] * num_experts
        
        # Fill in the scores for active experts
        for i in range(len(indices[token_idx])):
            expert_id = int(indices[token_idx][i].item())
            score = float(scores[token_idx][i].item())
            expert_scores[expert_id] = score
        
        # Format all expert scores to 6 decimal places
        formatted_scores = [f'{score:.6f}' for score in expert_scores]
        row.extend(formatted_scores)
        writer.writerow(row)

print(f"CSV saved to '{csv_filename}'")
print(f"Format: layer_id=0, token_id (0-511), expert_0, expert_1, ..., expert_63 (6 decimal places)")
print(f"Total rows: {len(samples) + 1} (including header)")


CSV saved to 'flame-moe-290m_runid31066_epoch1080_layer2_shard0-0_512tokens.csv'
Format: layer_id=0, token_id (0-511), expert_0, expert_1, ..., expert_63 (6 decimal places)
Total rows: 513 (including header)


In [3]:
import pandas as pd
import numpy as np
import csv

# Load the CSV file
csv_filename = "flame-moe-290m_runid31066_epoch1080_layer2_shard0-0_512tokens.csv"
df = pd.read_csv(csv_filename)

# Get expert columns (expert_0 to expert_63)
expert_columns = [f'expert_{i}' for i in range(64)]

# Initialize counter for each expert
expert_counts = {i: 0 for i in range(64)}

# For each token (row), find top 2 experts
for idx, row in df.iterrows():
    # Get scores for all experts
    expert_scores = row[expert_columns].values
    
    # Get indices of top 2 experts
    top_2_indices = np.argsort(expert_scores)[-2:][::-1]  # Descending order
    
    # Count each of the top 2 experts
    for expert_id in top_2_indices:
        expert_counts[expert_id] += 1

# Sort by count (descending)
sorted_experts = sorted(expert_counts.items(), key=lambda x: x[1], reverse=True)
total_assignments = sum(expert_counts.values())

# Calculate Expert_Activ (number of experts with at least one token)
expert_activ = sum(1 for count in expert_counts.values() if count > 0)

# Gating Mechanism Parameters
BW_PCIe = 32  # Gbps
BW_MD = 512   # Gbps
alpha = 1.0   # Scaling factor (default = 1)

# Calculate Expert_GPU and Expert_MD
expert_gpu = round((BW_PCIe / (BW_MD + BW_PCIe)) * expert_activ)
expert_md = expert_activ - expert_gpu

# Calculate Gating Mechanism Output (H)
H = int(alpha * expert_gpu)

# Create output filenames
base_filename = csv_filename.replace('.csv', '')
stats_txt_file = f"{base_filename}_top2_stats.txt"
stats_csv_file = f"{base_filename}_top2_stats.csv"

# Save to text file
with open(stats_txt_file, 'w') as f:
    f.write("=" * 60 + "\n")
    f.write("TOP 2 EXPERT TOKEN DISTRIBUTION\n")
    f.write("=" * 60 + "\n")
    f.write(f"Total tokens: {len(df)}\n")
    f.write(f"Expected total (tokens × 2): {len(df) * 2}\n")
    f.write("=" * 60 + "\n")
    f.write(f"{'Expert ID':<12} {'Token Count':<15} {'Percentage':<10}\n")
    f.write("-" * 60 + "\n")
    
    for expert_id, count in sorted_experts:
        percentage = (count / len(df)) * 100
        f.write(f"Expert {expert_id:<5} {count:<15} {percentage:>6.2f}%\n")
    
    f.write("-" * 60 + "\n")
    f.write(f"Total assignments: {total_assignments}\n")
    
    # Write Gating Mechanism Output
    f.write("\n" + "=" * 60 + "\n")
    f.write("GATING MECHANISM OUTPUT\n")
    f.write("=" * 60 + "\n")
    f.write(f"Expert_Activ (experts with ≥1 token): {expert_activ}\n")
    f.write(f"\nBandwidth Parameters:\n")
    f.write(f"  BW_PCIe: {BW_PCIe} Gbps\n")
    f.write(f"  BW_MD:   {BW_MD} Gbps\n")
    f.write(f"  Alpha (α): {alpha}\n")
    f.write(f"\nCalculated Values:\n")
    f.write(f"  Expert_GPU: {expert_gpu}\n")
    f.write(f"  Expert_MD:  {expert_md}\n")
    f.write(f"  H (Gating Output): {H}\n")
    f.write(f"\nFormulas Used:\n")
    f.write(f"  Expert_GPU = (BW_PCIe / (BW_MD + BW_PCIe)) × Expert_Activ\n")
    f.write(f"  Expert_GPU = ({BW_PCIe} / ({BW_MD} + {BW_PCIe})) × {expert_activ}\n")
    f.write(f"  Expert_MD = Expert_Activ - Expert_GPU\n")
    f.write(f"  H = α × Expert_GPU = {alpha} × {expert_gpu}\n")

# Save to CSV file
with open(stats_csv_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Expert_ID', 'Token_Count', 'Percentage', 'Rank'])
    
    for rank, (expert_id, count) in enumerate(sorted_experts, 1):
        percentage = (count / len(df)) * 100
        writer.writerow([expert_id, count, f"{percentage:.2f}", rank])

print(f"Statistics saved to:")
print(f"  - {stats_txt_file} (formatted text)")
print(f"  - {stats_csv_file} (CSV format)")
print(f"\nTotal tokens analyzed: {len(df)}")
print(f"Total expert assignments (top 2): {total_assignments}")
print(f"\nGating Mechanism Output:")
print(f"  Expert_Activ: {expert_activ}")
print(f"  Expert_GPU:   {expert_gpu}")
print(f"  Expert_MD:    {expert_md}")
print(f"  H (Gating Output): {H}")


Statistics saved to:
  - flame-moe-290m_runid31066_epoch1080_layer2_shard0-0_512tokens_top2_stats.txt (formatted text)
  - flame-moe-290m_runid31066_epoch1080_layer2_shard0-0_512tokens_top2_stats.csv (CSV format)

Total tokens analyzed: 512
Total expert assignments (top 2): 1024

Gating Mechanism Output:
  Expert_Activ: 64
  Expert_GPU:   4
  Expert_MD:    60
  H (Gating Output): 4
