# GPU Performance & Accuracy Benchmarking

This script systematically tests how different MACE model parameters affect training time, GPU utilization, and model accuracy. We vary parameters like `batch_size`, `num_channels`, `num_interactions`, `max_L`, and `correlation`.

Each configuration is trained on:
- `data/solvent_xtb_train_200.xyz`
- `data/solvent_xtb_test.xyz`

**Metrics collected:**
- Training duration
- Final validation error (RMSE_E and RMSE_F)
- GPU memory usage (via `nvidia-smi`)
- GPU utilization (average)


In [None]:
# db = read('data/solvent_xtb.xyz', ':')
# write('data/solvent_xtb_train_200.xyz', db[:203]) #first 200 configs plus the 3 E0s

# write('data/solvent_xtb_test.xyz', db[-1000:]) #last 1000 configs

In [1]:
import time
import subprocess
import yaml
import re
from pathlib import Path
from collections import defaultdict
import pandas as pd

Path("benchmark_results").mkdir(exist_ok=True)
Path("MACE_models").mkdir(exist_ok=True)

base_config = {
    'model': 'MACE',
    'num_interactions': 2,
    'num_channels': 32,
    'max_L': 0,
    'correlation': 2,
    'r_max': 4.0,
    'max_ell': 2,
    'model_dir': 'MACE_models',
    'log_dir': 'MACE_models',
    'checkpoints_dir': 'MACE_models',
    'results_dir': 'MACE_models',
    'train_file': 'data/solvent_xtb_train_200.xyz',
    'valid_fraction': 0.10,
    'test_file': 'data/solvent_xtb_test.xyz',
    'E0s': 'average',
    'energy_key': 'energy_xtb',
    'forces_key': 'forces_xtb',
    'device': 'cuda',
    'max_num_epochs': 50,
    'swa': True,
    'seed': 123
}

configs_to_test = [
    {'name': 'best_guess_v1', 'batch_size': 10, 'num_channels': 64, 'num_interactions': 3, 'correlation': 3},
    {'name': 'best_guess_v2', 'batch_size': 10, 'num_channels': 64, 'num_interactions': 3, 'correlation': 3, 'max_L': 1},
    {'name': 'best_guess_v3', 'batch_size': 10, 'num_channels': 64, 'num_interactions': 3, 'correlation': 3, 'max_L': 2},
    
    {'name': 'baseline', 'batch_size': 10},
    {'name': 'batch32', 'batch_size': 32},
    {'name': 'batch64', 'batch_size': 64},

    {'name': 'channels64', 'batch_size': 10, 'num_channels': 64},
    {'name': 'channels128', 'batch_size': 10, 'num_channels': 128},
    {'name': 'interactions3', 'batch_size': 10, 'num_interactions': 3},
    {'name': 'interactions4', 'batch_size': 10, 'num_interactions': 4},
    {'name': 'maxL1', 'batch_size': 10, 'max_L': 1},
    {'name': 'maxL2', 'batch_size': 10, 'max_L': 2},
    {'name': 'correlation3', 'batch_size': 10, 'correlation': 3},
    {'name': 'correlation4', 'batch_size': 10, 'correlation': 4},

    {'name': 'chan64_corr3', 'batch_size': 10, 'num_channels': 64, 'correlation': 3},
    {'name': 'chan64_inter3', 'batch_size': 10, 'num_channels': 64, 'num_interactions': 3},
    {'name': 'inter3_corr3', 'batch_size': 10, 'num_interactions': 3, 'correlation': 3},
    {'name': 'maxL1_corr3', 'batch_size': 10, 'max_L': 1, 'correlation': 3}
]


In [2]:
def cleanup_old_files(model_prefix):
    pattern_extensions = [
        ".log", ".txt", ".model", "_compiled.model",
        "_run-123.log", "_run-123.model", "_run-123_train.txt",
        "_swa.model", "_swa_compiled.model"
    ]
    pattern_suffixes = ["_epoch-48_swa.pt", "_epoch-*.pt"]

    for ext in pattern_extensions:
        path = Path("MACE_models") / f"{model_prefix}{ext}"
        if path.exists():
            path.unlink()

    for path in Path("MACE_models").glob(f"{model_prefix}_run-123_epoch-*.pt"):
        path.unlink()


In [3]:
def get_gpu_stats():
    result = subprocess.run(
        ['nvidia-smi', '--query-gpu=utilization.gpu,memory.used',
         '--format=csv,noheader,nounits'],
        stdout=subprocess.PIPE, text=True
    )
    lines = result.stdout.strip().split('\n')
    utils, mems = zip(*[map(int, line.split(',')) for line in lines])
    return sum(utils) / len(utils), sum(mems) / len(mems)


In [4]:
def parse_log_file(log_path):
    with open(log_path, 'r') as f:
        lines = f.readlines()

    rmse_table_lines = [i for i, l in enumerate(lines) if "+-------------+---------------------+------------------+-------------------+" in l]
    if not rmse_table_lines:
        print("❌ No RMSE table found.")
        return None, None

    for idx in reversed(rmse_table_lines):
        for j in range(idx, min(idx + 10, len(lines))):
            if '|    valid    |' in lines[j]:
                parts = lines[j].strip().split('|')
                try:
                    rmse_e = float(parts[2].strip())
                    rmse_f = float(parts[3].strip())
                    print(f"✅ Found: RMSE_E = {rmse_e} meV, RMSE_F = {rmse_f} meV/Å")
                    return rmse_e, rmse_f
                except Exception as e:
                    print(f"❌ Error during parsing: {e}")
                    return None, None
    print("❌ Found no 'valid' line.")
    return None, None


In [None]:
results = []

for conf in configs_to_test:
    cfg = base_config.copy()
    cfg.update(conf)
    cfg['name'] = f"mace_benchmark_{conf['name']}"
    cfg['batch_size'] = conf['batch_size']
    cleanup_old_files(cfg['name'])

    cfg_file = f"benchmark_results/{cfg['name']}.yaml"
    with open(cfg_file, 'w') as f:
        yaml.dump(cfg, f)

    print(f"\n🚀 Training started: {cfg['name']}")
    
    monitor_data = []
    def monitor_gpu(interval=1.0):
        while monitoring:
            util, mem = get_gpu_stats()
            monitor_data.append((time.time(), util, mem))
            time.sleep(interval)


    monitoring = True
    import threading
    monitor_thread = threading.Thread(target=monitor_gpu)
    monitor_thread.start()

    start_time = time.time()
    subprocess.run(['mace_run_train', '--config', cfg_file])
    duration = time.time() - start_time
    monitoring = False
    monitor_thread.join()

    if monitor_data:
        gpu_util_avg = sum(x[1] for x in monitor_data) / len(monitor_data)
        gpu_mem_avg = sum(x[2] for x in monitor_data) / len(monitor_data)
    else:
        gpu_util_avg = gpu_mem_avg = None

    log_file = Path(f"MACE_models/{cfg['name']}_run-123.log")
    rmse_e, rmse_f = parse_log_file(log_file)

    results.append({
        'config': cfg['name'],
        'batch_size': cfg['batch_size'],
        'num_channels': cfg.get('num_channels', 32),
        'num_interactions': cfg.get('num_interactions', 2),
        'max_L': cfg.get('max_L', 0),
        'correlation': cfg.get('correlation', 2),
        'train_time_s': round(duration, 2),
        'gpu_util_avg': round(gpu_util_avg, 1) if gpu_util_avg else None,
        'gpu_mem_avg_MB': round(gpu_mem_avg, 1) if gpu_mem_avg else None,
        'rmse_e_meV': rmse_e,
        'rmse_f_meV': rmse_f
    })

df_results = pd.DataFrame(results)
df_results.sort_values("rmse_f_meV")
df_results.to_csv("benchmark_results/parameters_results.csv", index=False);