diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 7022ebba..36a5f4f5 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -19,7 +19,7 @@ If the issue occurs while running a Python workload or involves a simulator cras For example: ``` -python3 tests/test_add.py +python3 tests/ops/elementwise/test_add.py ... [SpikeSimulator] cmd> spike --isa rv64gcv --varch=vlen:256,elen:64 --vectorlane-size=128 \ -m0x80000000:0x1900000000,0x2000000000:0x1000000 \ diff --git a/.github/workflows/pytorchsim_test.yml b/.github/workflows/pytorchsim_test.yml index e7ae6385..da8366af 100644 --- a/.github/workflows/pytorchsim_test.yml +++ b/.github/workflows/pytorchsim_test.yml @@ -33,7 +33,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_add.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_add.py test_transcendental: name: Run test_transcendental.py @@ -52,7 +52,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transcendental.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_transcendental.py test_activation: name: Run test_activation.py @@ -71,7 +71,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_activation.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_activation.py test_batchnorm: name: Run test_batchnorm.py @@ -90,7 +90,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_batchnorm.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_batchnorm.py test_bmm: name: Run test_bmm.py @@ -109,7 +109,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_bmm.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/gemm/test_bmm.py test_cnn: name: Run test_cnn.py @@ -128,7 +128,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_cnn.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/conv/test_cnn.py test_conv2d: name: Run test_conv2d.py @@ -147,7 +147,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_conv2d.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/conv/test_conv2d.py test_cat: name: Run test_cat.py @@ -166,7 +166,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_cat.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_cat.py test_matmul: name: Run test_matmul.py @@ -185,7 +185,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_matmul.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/gemm/test_matmul.py test_reduce: name: Run test_reduce.py @@ -204,7 +204,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_reduce.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_reduce.py test_softmax: name: Run test_softmax.py @@ -223,7 +223,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_softmax.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_softmax.py test_transpose2D: name: Run test_transpose2D.py @@ -242,7 +242,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transpose2D.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_transpose2D.py test_view3D_2D: name: Run test_view3D_2D.py @@ -261,7 +261,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_view3D_2D.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_view3D_2D.py test_layernorm: name: Run test_layernorm.py @@ -280,7 +280,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_layernorm.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_layernorm.py test_mlp: name: Run test_mlp.py @@ -299,7 +299,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_mlp.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_mlp.py test_resnet: name: Run test_resnet.py @@ -318,7 +318,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_resnet.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_resnet.py - name: Run test_resnet50.py run: | @@ -326,7 +326,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_resnet.py --model_type resnet50 + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_resnet.py --model_type resnet50 test_mobilenet: name: Run test_mobilenet.py @@ -345,7 +345,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/MobileNet/test_mobilenet.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/MobileNet/test_mobilenet.py test_transformer: name: Run test_transformer.py @@ -364,7 +364,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transformer.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_transformer.py test_transpose3D: name: Run test_transpose3D.py @@ -383,7 +383,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transpose3D.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_transpose3D.py test_sparsity: name: Run test_sparsity.py @@ -402,7 +402,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_sparsity.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/sparsity/test_sparsity.py test_pool: name: Run test_pool.py @@ -421,7 +421,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_pool.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/conv/test_pool.py test_perceptron: name: Run test_perceptron.py @@ -440,7 +440,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_single_perceptron.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_single_perceptron.py test_fusion: name: Run test_fusion @@ -459,7 +459,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_addmm_residual.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_addmm_residual.py - name: Run test_matmul_activation.py run: | @@ -467,7 +467,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_activation.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_activation.py - name: Run test_matmul_scalar.py run: | @@ -475,7 +475,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_scalar.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_scalar.py - name: Run test_matmul_reduction.py run: | @@ -483,7 +483,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_reduction.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_reduction.py - name: Run test_bmm_reduction.py run: | @@ -491,7 +491,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_bmm_reduction.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_bmm_reduction.py - name: Run test_prologue_fusion.py run: | @@ -499,7 +499,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_prologue_fusion.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_prologue_fusion.py - name: Run test_transformer_fusion.py run: | @@ -507,7 +507,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_transformer_fusion.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_transformer_fusion.py - name: Run test_conv_fusion.py run: | @@ -515,7 +515,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_conv_fusion.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_conv_fusion.py - name: Run test_attention_fusion.py run: | @@ -523,7 +523,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_attention_fusion.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_attention_fusion.py - name: Run test_matmul_vector.py run: | @@ -531,7 +531,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_vector.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_vector.py test_moe: name: Run test_moe @@ -550,7 +550,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/MoE/test_moe.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/MoE/test_moe.py test_mistral: name: Run test_mistral @@ -569,7 +569,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Mixtral_8x7B/test_attention.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Mixtral8x7B/test_attention.py test_vit: name: Run test_vit @@ -588,7 +588,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_vit.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_vit.py test_diffusion: name: Run test_diffusion @@ -607,7 +607,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Diffusion/test_diffusion.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Diffusion/test_diffusion.py test_indirect: name: Run test_indirect @@ -626,7 +626,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_indirect_access.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/misc/test_indirect_access.py test_scheduler: name: Run test_scheduler @@ -645,7 +645,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_scheduler.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/system/test_scheduler.py test_llama: name: Run test_llama1&2 @@ -664,7 +664,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Llama/test_llama.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Llama/test_llama.py test_yolov5: name: Run test_yolov5 @@ -683,7 +683,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/Yolov5/test_yolov5.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Yolov5/test_yolov5.py test_deepseek: name: Run test_deepseek @@ -702,7 +702,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/DeepSeek/test_deepseek_v3_base.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/models/DeepSeek/test_deepseek_v3_base.py test_eager: name: Run test_eager.py @@ -721,7 +721,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_eager.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/system/test_eager.py test_exponent: name: Run test_exponent.py @@ -740,7 +740,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_exponent.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_exponent.py test_sort: name: Run test_sort.py @@ -759,7 +759,7 @@ jobs: docker run --rm \ -e vpu_num_lanes="${{ inputs.vector_lane }}" \ -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ - ${{ inputs.image_name }} python3 PyTorchSim/tests/test_sort.py + ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/sort/test_sort.py test_accuracy: name: Run test_accuracy and test_speedup diff --git a/CLAUDE.md b/CLAUDE.md index 8eb99c93..4a4e7424 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -23,7 +23,7 @@ The pipeline runs in that order on every `torch.compile` invocation; you'll see | `TOGSim/` | C++ TOGSim source. `src/Simulator.cc`, `Core.cc`, `Dram.cc`, `Interconnect.cc`, `L2Cache.cc`, `Tile.cc`, `TileGraph.cc` are the core models. Externals: ramulator2, booksim, stonneCore, onnx, protobuf, spdlog, yaml-cpp | | `AsmParser/` | `tog_generator.py`, `onnx_utility.py` — TOG generation from ONNX/ASM | | `configs/` | TOGSim hardware configs (YAML). The default is `systolic_ws_128x128_c1_simple_noc_tpuv3.yml`. Naming pattern: `systolic_ws__c__.yml` | -| `tests/` | ~36 op- and model-level tests. Subdirs `DeepSeek/`, `Diffusion/`, `Llama/`, `MLP/`, `Mixtral_8x7B/`, `MoE/`, `Yolov5/`, `Fusion/` for whole-model workloads | +| `tests/` | Op- and model-level tests organized under `ops//` (elementwise, reduce, gemm, conv, attention, view, sort, sparsity, misc, fusion), `models//` (Llama, Mixtral8x7B, DeepSeek, Diffusion, MoE, MLP, MobileNet, Yolov5) plus single-file model tests (test_resnet, test_transformer, test_vit, test_mlp, test_single_perceptron), and `system/` (scheduler, eager, hetro, stonne, vectorops). Shared helper: `tests/_utils.py` | | `experiments/artifact/` | Paper reproduction scripts (`cycle_validation/run_cycle.sh`, `speedup/run_speedup.sh`) | | `scripts/` | One-off experiment runners (CompilerOpt, ILS, batch, chiplet, sparsity, stonne, end2end). `build_from_source.sh` builds gem5/llvm/spike | | `gem5_script/` | gem5 wrapper scripts called by `CycleSimulator` | @@ -36,16 +36,16 @@ The pipeline runs in that order on every `torch.compile` invocation; you'll see Most tests follow the same pattern: build CPU reference, compile via `torch.compile` on `npu:0`, compare with `torch.allclose` (rtol=atol=1e-4). They all have `if __name__ == "__main__"` blocks. ```bash -python tests/test_add.py # vector add (smoke test, fastest) -python tests/test_matmul.py # GEMM -python tests/test_mlp.py # MLP forward + backward (training path) -python tests/test_scheduler.py # multi-tenant launch_model -python tests/test_eager.py # eager-fallback registration +python tests/ops/elementwise/test_add.py # vector add (smoke test, fastest) +python tests/ops/gemm/test_matmul.py # GEMM +python tests/models/test_mlp.py # MLP forward + backward (training path) +python tests/system/test_scheduler.py # multi-tenant launch_model +python tests/system/test_eager.py # eager-fallback registration ``` -Run a model from `tests/Llama/`, `tests/DeepSeek/`, etc. similarly. +Run a model from `tests/models/Llama/`, `tests/models/DeepSeek/`, etc. similarly. -**CI coverage:** the GitHub Actions workflow `.github/workflows/pytorchsim_test.yml` runs an **explicit allowlist** of `tests/*.py` files (~40 jobs, one Docker container per test). Adding a new file under `tests/` does *not* automatically gate PRs — register it in `pytorchsim_test.yml` if you want CI to exercise it. Conversely, files like `tests/test_gqa.py`, `tests/test_gqa_decode.py`, and `tests/test_eager.py` exist in the repo but are *not* in CI, so local validation is the only safety net for them. +**CI coverage:** the GitHub Actions workflow `.github/workflows/pytorchsim_test.yml` runs an **explicit allowlist** of `tests/*.py` files (~40 jobs, one Docker container per test). Adding a new file under `tests/` does *not* automatically gate PRs — register it in `pytorchsim_test.yml` if you want CI to exercise it. Conversely, files like `tests/ops/attention/test_gqa.py`, `tests/ops/attention/test_gqa_decode.py`, and `tests/system/test_eager.py` exist in the repo but are *not* in CI, so local validation is the only safety net for them. **For fast iteration** (skip functional check): ```bash @@ -123,7 +123,7 @@ Conan deps for TOGSim: `boost/1.79.0`, `robin-hood-hashing/3.11.5`, `spdlog/1.11 - **Adding a PyTorch device op:** `PyTorchSimDevice/csrc/aten/native/*` (Minimal/Extra split mirrors `torch_openreg`). - **TOGSim hardware model changes:** `TOGSim/src/{Core,Dram,Interconnect,L2Cache,Tile,TileGraph}.cc` + matching `include/*.h`. - **TOG generation:** `AsmParser/tog_generator.py` builds the raw graph and serializes it via `AsmParser/onnx_utility.py` to **ONNX, which is the on-disk TOG format** consumed by TOGSim. -- **Eager fallback registration:** `torch.npu.register_eager_to_compile([...])` — see `tests/test_eager.py`. +- **Eager fallback registration:** `torch.npu.register_eager_to_compile([...])` — see `tests/system/test_eager.py`. - **Per-run results:** `togsim_results/>.log` (stats) and `.trace` (instruction trace). The path is also printed at the end of every run. - **Wrapper codegen path:** printed as `Wrapper Codegen Path = /tmp/torchinductor_//...py` — useful for inspecting generated kernel code and tensor names for `SRAM_BUFFER_PLAN_PATH`. diff --git a/README.md b/README.md index 800f4761..f0bdc772 100644 --- a/README.md +++ b/README.md @@ -40,15 +40,15 @@ PyTorchSim **supports**: |---|:-:|:-:|---| | ResNet-18 | | ✅ | channel last format | | ResNet-50 | | ✅ | channel last format | -| MobileNet-v2 | | ✅ | `tests/MobileNet/` (torchvision) | -| YOLOv5 | | ✅ | `tests/Yolov5/` | +| MobileNet-v2 | | ✅ | `tests/models/MobileNet/` (torchvision) | +| YOLOv5 | | ✅ | `tests/models/Yolov5/` | | BERT | | ✅ | | | GPT-2 | | ✅ | | -| ViT | | ✅ | `tests/test_vit.py` | +| ViT | | ✅ | `tests/models/test_vit.py` | | Mistral | | ✅ | | | Stable-diffusion v1 | 🤗 | ✅ | | -| Llama 2/3 | 🤗 | ✅ | `tests/Llama/` (blocks & decode-style paths) | -| DeepSeek-V3 (base) | 🤗 | ✅ | `tests/DeepSeek/` — several ops(e.g., gate ops) are not cycle-modeled | +| Llama 2/3 | 🤗 | ✅ | `tests/models/Llama/` (blocks & decode-style paths) | +| DeepSeek-V3 (base) | 🤗 | ✅ | `tests/models/DeepSeek/` — several ops(e.g., gate ops) are not cycle-modeled | | Llama-4 | 🤗 | ⏳ | In development | | Broader model support | — | ⏳ | In development |