<a href="https://colab.research.google.com/github/CalvinXKY/mfu_calculation/blob/main/mfu_calculation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MFU快速计算器

## 硬件配置

In [None]:
"""
Create a mfu calculation for LLM.
"""
# Hardware setting：
GPU_FLOPS = 280 # 机器理论峰值 单位TFops/s
GPU_NUMS = 1024

## 模型参数配置

In [None]:
# parameters eg：
GBS = 1024
SEQ_LEN = 4096
HIDDEN_SIZE = 1024
NUM_HEADS = 8
D_MODEL = 128
VOCAB_SIZE = 32768
NUM_QUERY_GROUPS = 4
FFN_HIDDEN_SIZE = 2048
LAYER_NUMS = 100
CP = 1
STEP_TIME = 1.5

# 非MoE 模型设置top_k=0, shared_experts=1
SHARE_EXPERTS = 1
TOP_K = 8

## 计算函数定义：

In [None]:
import numpy as np

# 简化版：
def mfu_calculation(step_time=STEP_TIME,
             gbs=GBS,
             seq_len=SEQ_LEN,
             hidden_size=HIDDEN_SIZE,
             vocab_size=VOCAB_SIZE,
             num_heads=NUM_HEADS,
             d_model=D_MODEL,
             num_query_groups=NUM_QUERY_GROUPS,
             ffn_hidden_size=FFN_HIDDEN_SIZE,
             share=SHARE_EXPERTS,
             top_k=TOP_K,
             layer_nums=LAYER_NUMS,
             context_parallel=CP,
             mlp_with_gate=True):
    embedding_flops = gbs * seq_len * hidden_size * vocab_size

    # attention flops
    q_linear = gbs * seq_len * hidden_size ** 2
    kv_linear = gbs * seq_len * hidden_size * num_query_groups * d_model
    kv_scores = gbs * seq_len**2 * num_heads * d_model * (context_parallel + 1) / (2 * context_parallel)
    v_projection = gbs * seq_len**2 * num_heads * d_model * (context_parallel + 1) / (2 * context_parallel)

    out_linear = gbs * seq_len * hidden_size ** 2
    attention_layer_flops = q_linear + kv_linear + kv_scores + v_projection + out_linear

    # consider layer norm. (It can be ignored)
    layer_norm = gbs * seq_len * hidden_size

    if mlp_with_gate:
        # llama structure
        mlp_layer_flops = 3 * gbs * seq_len * hidden_size * ffn_hidden_size
    else:
        mlp_layer_flops = 2 * gbs * seq_len * hidden_size * ffn_hidden_size

    moe_layer_flops = mlp_layer_flops * (share + top_k)
    model_flops = 3 * 2 * (embedding_flops + layer_nums * (attention_layer_flops + moe_layer_flops + layer_norm))

    mfu = model_flops / (GPU_NUMS * step_time * (10 ** 12)) / GPU_FLOPS
    return mfu

## 计算测试

In [None]:
mfu_calculation(step_time=1.5)

0.39771996347245714