In [16]:
import os
import re
from datetime import datetime

# Base directory - adjust this to match your actual folder structure
base_dir = 'Encoder-BS32-SL512'

# Process both NPU and PIM folders
for folder_type in ['npu', 'pim']:
    folder_path = os.path.join(base_dir, folder_type)
    
    if not os.path.exists(folder_path):
        print(f"{folder_path} does not exist, skipping...")
        continue
    
    print(f"\n{'='*80}")
    print(f"Processing {folder_path}")
    print(f"{'='*80}\n")
    
    # First, identify existing numbered folders (like L6, L8, 6, 8, etc.)
    taken_numbers = set()
    subdirs = []
    
    for item in os.listdir(folder_path):
        item_path = os.path.join(folder_path, item)
        if os.path.isdir(item_path):
            # Check if folder name contains a number (like L6, L8, or just 6, 8)
            num_match = re.search(r'L?(\d+)', item)
            if num_match and not re.match(r'\d{4}-\d{2}-\d{2}', item):
                # This is a numbered folder (not a date-time folder)
                num = int(num_match.group(1))
                taken_numbers.add(num)
                print(f"Found existing numbered folder: {item} (position {num})")
            
            # Try to parse date-time format: YYYY-MM-DD_HH:MM:SS
            match = re.match(r'(\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2})', item)
            if match:
                datetime_str = match.group(1)
                try:
                    dt = datetime.strptime(datetime_str, '%Y-%m-%d_%H:%M:%S')
                    subdirs.append((item, dt, item_path))
                except ValueError:
                    print(f"Could not parse datetime from: {item}")
    
    if not subdirs:
        print(f"No date-time formatted folders found in {folder_path}")
        continue
    
    # Sort by datetime (oldest to newest)
    subdirs.sort(key=lambda x: x[1])
    
    print(f"\nFound {len(subdirs)} date-time folders to rename.")
    if taken_numbers:
        print(f"Numbers already taken: {sorted(taken_numbers)}")
    print()
    
    # Rename folders from 2 onwards, skipping taken numbers
    current_number = 2
    for old_name, dt, old_path in subdirs:
        # Skip numbers that are already taken
        while current_number in taken_numbers:
            print(f"  Skipping {current_number} (already exists)")
            current_number += 1
        
        new_name = str(current_number)
        new_path = os.path.join(folder_path, new_name)
        
        print(f"  {current_number}. {old_name} ({dt.strftime('%Y-%m-%d %H:%M:%S')}) -> {new_name}")
        
        # Rename the folder
        try:
            os.rename(old_path, new_path)
            current_number += 1
        except Exception as e:
            print(f"     ERROR: Could not rename: {e}")
            current_number += 1

print(f"\n{'='*80}")
print("✓ Folder renaming complete!")
print(f"{'='*80}")



Processing Encoder-BS32-SL512/npu


Found 8 date-time folders to rename.

  2. 2025-11-10_22:28:25 (2025-11-10 22:28:25) -> 2
  3. 2025-11-10_22:28:52 (2025-11-10 22:28:52) -> 3
  4. 2025-11-10_22:29:07 (2025-11-10 22:29:07) -> 4
  5. 2025-11-10_22:29:23 (2025-11-10 22:29:23) -> 5
  6. 2025-11-10_22:29:37 (2025-11-10 22:29:37) -> 6
  7. 2025-11-10_22:29:52 (2025-11-10 22:29:52) -> 7
  8. 2025-11-10_22:30:06 (2025-11-10 22:30:06) -> 8
  9. 2025-11-10_22:30:23 (2025-11-10 22:30:23) -> 9
Encoder-BS32-SL512/pim does not exist, skipping...

✓ Folder renaming complete!


In [17]:
import os
import shutil

# Base directory
base_dir = 'Encoder-BS32-SL512'

# Files to keep
files_to_keep = ['terminal_output.log', 'SA_stage_E.tsv']

# Process NPU and PIM folders
for folder_name in ['npu', 'pim']:
    folder_path = os.path.join(base_dir, folder_name)
    
    if os.path.exists(folder_path):
        print(f"\nProcessing {folder_path}...")
        
        # Walk through all subdirectories
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                
                # Check if this file should be kept
                if file not in files_to_keep:
                    try:
                        os.remove(file_path)
                        print(f"  Deleted: {file_path}")
                    except Exception as e:
                        print(f"  Error deleting {file_path}: {e}")
                else:
                    print(f"  Kept: {file_path}")
    else:
        print(f"\n{folder_path} does not exist")

print("\n✓ Cleanup complete!")



Processing Encoder-BS32-SL512/npu...
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_19.tsv:Zone.Identifier
  Kept: Encoder-BS32-SL512/npu/6/SA_stage_E.tsv
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_6.tsv:Zone.Identifier
  Kept: Encoder-BS32-SL512/npu/6/terminal_output.log
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_26.tsv
  Deleted: Encoder-BS32-SL512/npu/6/_summary.tsv:Zone.Identifier
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_9.tsv
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_12.tsv
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_2.tsv
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_27.tsv:Zone.Identifier
  Deleted: Encoder-BS32-SL512/npu/6/npu_utilization.tsv
  Deleted: Encoder-BS32-SL512/npu/6/_summary.tsv
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_10.tsv:Zone.Identifier
  Deleted: Encoder-BS32-SL512/npu/6/dramsim3.json
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_12.tsv:Zone.Identifier
  Deleted: Encoder-BS32-SL512/npu/6/mem_io_E_ch_23.tsv:Zone.Identifie

In [18]:
import pandas as pd
import re
import os
import glob
from collections import defaultdict

# Find all SA_stage_E.tsv files in the NPU folder
npu_folder = 'Encoder-BS32-SL512/npu'
tsv_files = glob.glob(os.path.join(npu_folder, '**/SA_stage_E.tsv'), recursive=True)

print(f"Found {len(tsv_files)} SA_stage_E.tsv file(s) in {npu_folder}\n")

# Process each TSV file
all_results = {}

for input_file in tsv_files:
    print(f"\n{'='*80}")
    print(f"Processing: {input_file}")
    print(f"{'='*80}\n")
    
    # Read the TSV file
    df = pd.read_csv(input_file, sep='\t')
    print(f"Data loaded successfully! Total rows: {len(df)}")
    
    # Extract expert operations - only TotalCycle
    expert_data = defaultdict(lambda: {'param_load': None, 'fc1': None, 'gelu': None, 'fc2': None})
    
    for idx, row in df.iterrows():
        op_name = row['OpName']
        
        # Check if it's an expert operation
        match = re.match(r'layer0\.ffn\.moe_expert\.(\d+)\.(param_load|fc1|fc2|gelu)', op_name)
        
        if match:
            expert_num = int(match.group(1))
            operation = match.group(2)
            expert_data[expert_num][operation] = row['TotalCycle']
    
    print(f"Processed data for {len(expert_data)} experts\n")
    
    # Create DataFrame with only total cycles
    summary_data = []
    
    for expert_num in sorted(expert_data.keys()):
        param_load_raw = expert_data[expert_num]['param_load'] if expert_data[expert_num]['param_load'] else 0
        fc1 = expert_data[expert_num]['fc1'] if expert_data[expert_num]['fc1'] else 0
        gelu = expert_data[expert_num]['gelu'] if expert_data[expert_num]['gelu'] else 0
        fc2 = expert_data[expert_num]['fc2'] if expert_data[expert_num]['fc2'] else 0
        
        # Multiply param_load by 64
        param_load = param_load_raw * 64 if param_load_raw != 0 else 'N/A'
        
        # Calculate total compute (fc1 + gelu + fc2)
        total_compute = fc1 + gelu + fc2
        
        row_data = {
            'Expert Number': expert_num,
            'param_load': param_load,
            'fc1': fc1,
            'gelu': gelu,
            'fc2': fc2,
            'total compute': total_compute
        }
        summary_data.append(row_data)
    
    summary_df = pd.DataFrame(summary_data)
    
    # Print table
    print("Expert Operations - Total Cycles")
    print("="*80)
    print(summary_df.to_string(index=False))
    
    # Save to text file with same name as input but .txt extension
    output_file = os.path.splitext(input_file)[0] + '.txt'
    with open(output_file, 'w') as f:
        f.write("Expert Operations - Total Cycles\n")
        f.write("="*80 + "\n")
        f.write(summary_df.to_string(index=False))
    
    print(f"\nOutput saved to: {output_file}\n")
    
    # Store result for this file
    all_results[input_file] = summary_df

print(f"\n{'='*80}")
print(f"✓ Processing complete! Processed {len(all_results)} file(s)")
print(f"{'='*80}")

# Display the last DataFrame (or first if you prefer)
if all_results:
    list(all_results.values())[-1]


Found 8 SA_stage_E.tsv file(s) in Encoder-BS32-SL512/npu


Processing: Encoder-BS32-SL512/npu/6/SA_stage_E.tsv

Data loaded successfully! Total rows: 261
Processed data for 64 experts

Expert Operations - Total Cycles
 Expert Number  param_load   fc1  gelu   fc2  total compute
             0      353536 17162  1438 17162          35762
             1      353600 20089  1680 20088          41857
             2      353536 15049  1260 15049          31358
             3      353536 18169  1520 18170          37859
             4      353536 20904  1749 20906          43559
             5      353600 18697  1565 18697          38959
             6      353600 22153  1852 22154          46159
             7      353536 21673  1812 21673          45158
             8      353600 14377  1207 14376          29960
             9      353536 20137  1687 20138          41962
            10      353536 20234  1692 20234          42160
            11      353536 16297  1365 16298          33960
  

In [14]:
import pandas as pd
import re
import os
import glob
from collections import defaultdict

# Find all SA_stage_E.tsv files in the PIM folder
pim_folder = 'Encoder-BS32-SL256/pim'
tsv_files = glob.glob(os.path.join(pim_folder, '**/SA_stage_E.tsv'), recursive=True)

print(f"Found {len(tsv_files)} SA_stage_E.tsv file(s) in {pim_folder}\n")

# Process each TSV file
all_results = {}

for input_file in tsv_files:
    print(f"\n{'='*80}")
    print(f"Processing: {input_file}")
    print(f"{'='*80}\n")
    
    # Read the TSV file
    df = pd.read_csv(input_file, sep='\t')
    print(f"Data loaded successfully! Total rows: {len(df)}")
    
    # Extract SL (sequence length) from folder path to determine activation multiplier
    sl_match = re.search(r'SL(\d+)', input_file)
    if sl_match:
        sl_value = int(sl_match.group(1))
        activation_multiplier = sl_value / 64
        print(f"Detected SL{sl_value} - Activation multiplier: {activation_multiplier}x")
    else:
        activation_multiplier = 1
        print(f"No SL detected - Using default activation multiplier: 1x")
    
    # Extract activation movement operations
    activation_movements = {}
    for idx, row in df.iterrows():
        op_name = row['OpName']
        if 'activation_movement' in op_name:
            activation_movements[op_name] = row['TotalCycle']
    
    # Display activation movements with multiplier applied
    if activation_movements:
        print("\n--- Activation Movements ---")
        total_activation_cycles = 0
        for op_name, cycles in activation_movements.items():
            scaled_cycles = int(cycles * activation_multiplier)
            print(f"{op_name}: {scaled_cycles} cycles")
            total_activation_cycles += scaled_cycles
        print(f"Total activation movement cycles: {total_activation_cycles}")
        print("----------------------------\n")
    
    # Extract expert operations - only TotalCycle (no param_load for PIM)
    expert_data = defaultdict(lambda: {'fc1': None, 'gelu': None, 'fc2': None})
    
    for idx, row in df.iterrows():
        op_name = row['OpName']
        
        # Check if it's an expert operation
        match = re.match(r'layer0\.ffn\.moe_expert\.(\d+)\.(fc1|fc2|gelu)', op_name)
        
        if match:
            expert_num = int(match.group(1))
            operation = match.group(2)
            expert_data[expert_num][operation] = row['TotalCycle']
    
    print(f"Processed data for {len(expert_data)} experts\n")
    
    # Create DataFrame with only total cycles
    summary_data = []
    
    for expert_num in sorted(expert_data.keys()):
        fc1 = expert_data[expert_num]['fc1'] if expert_data[expert_num]['fc1'] else 0
        gelu = expert_data[expert_num]['gelu'] if expert_data[expert_num]['gelu'] else 0
        fc2 = expert_data[expert_num]['fc2'] if expert_data[expert_num]['fc2'] else 0
        
        # Calculate total compute (fc1 + gelu + fc2)
        total_compute = fc1 + gelu + fc2
        
        row_data = {
            'Expert Number': expert_num,
            'fc1': fc1,
            'gelu': gelu,
            'fc2': fc2,
            'total compute': total_compute
        }
        summary_data.append(row_data)
    
    summary_df = pd.DataFrame(summary_data)
    
    # Print table with note about activation movements
    print("PIM Expert Operations - Total Cycles")
    print("Note: 2 activation movements (fc1 and fc2)")
    print("="*80)
    print(summary_df.to_string(index=False))
    
    # Save to text file with same name as input but .txt extension
    output_file = os.path.splitext(input_file)[0] + '.txt'
    with open(output_file, 'w') as f:
        f.write("PIM Expert Operations - Total Cycles\n")
        f.write("Note: 2 activation movements (fc1 and fc2)\n\n")
        
        # Write activation movements info with scaling
        if activation_movements:
            f.write("--- Activation Movements ---\n")
            total_activation_cycles = 0
            for op_name, cycles in activation_movements.items():
                scaled_cycles = int(cycles * activation_multiplier)
                f.write(f"{op_name}: {scaled_cycles} cycles\n")
                total_activation_cycles += scaled_cycles
            f.write(f"Total activation movement cycles: {total_activation_cycles}\n")
            f.write("----------------------------\n\n")
        
        f.write("="*80 + "\n")
        f.write(summary_df.to_string(index=False))
    
    print(f"\nOutput saved to: {output_file}\n")
    
    # Store result for this file
    all_results[input_file] = summary_df

print(f"\n{'='*80}")
print(f"✓ Processing complete! Processed {len(all_results)} file(s)")
print(f"{'='*80}")

# Display the last DataFrame (or first if you prefer)
if all_results:
    list(all_results.values())[-1]


Found 8 SA_stage_E.tsv file(s) in Encoder-BS32-SL256/pim


Processing: Encoder-BS32-SL256/pim/6/SA_stage_E.tsv

Data loaded successfully! Total rows: 263
Detected SL256 - Activation multiplier: 4.0x

--- Activation Movements ---
layer0.ffn.activation_movement_2: 572596 cycles
layer0.ffn.activation_movement_1: 575268 cycles
Total activation movement cycles: 1147864
----------------------------

Processed data for 64 experts

PIM Expert Operations - Total Cycles
Note: 2 activation movements (fc1 and fc2)
 Expert Number    fc1  gelu    fc2  total compute
             0 433722   663 433710         868095
             1 597048   887 597112        1195047
             2 428089   648 428075         856812
             3 492841   740 492843         986424
             4 549177   821 549177        1099175
             5 521016   789 521001        1042806
             6 707082  1042 706858        1414982
             7 616745  1085 616745        1234575
             8 498475   749 498475        