Please follow the AE Appendix in corresponding HPCA 2024 paper "Gemini: Mapping and Architecture Co-exploration for Large-scale DNN Chiplet Accelerators" to initiate the framework. We supplement the scripts of DSE for 72TOPs, 128TOPs, and 512TOPs.
Many essential foundational components of Gemini are shared with the SET framework from the same project. We are currently updating the architecture and documentation of the SET framework. For urgent inquiries about Gemini, one can refer to the annotations in SET. Additionally, we plan to migrate and update these annotations from SET to Gemini and include comments related to code unique to Gemini.
@inproceedings{10.1145/3579371.3589048,
author = {Cai, Jingwei and Wei, Yuchen and Wu, Zuotong and Peng, Sen and Ma, Kaisheng},
title = {Inter-layer Scheduling Space Definition and Exploration for Tiled Accelerators},
year = {2023},
isbn = {9798400700958},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3579371.3589048},
doi = {10.1145/3579371.3589048},
abstract = {With the continuous expansion of the DNN accelerator scale, inter-layer scheduling, which studies the allocation of computing resources to each layer and the computing order of all layers in a DNN, plays an increasingly important role in maintaining a high utilization rate and energy efficiency of DNN inference accelerators. However, current inter-layer scheduling is mainly conducted based on some heuristic patterns. The space of inter-layer scheduling has not been clearly defined, resulting in significantly limited optimization opportunities and a lack of understanding on different inter-layer scheduling choices and their consequences.To bridge the gaps, we first propose a uniform and systematic notation, the Resource Allocation Tree (RA Tree), to represent different inter-layer scheduling schemes and depict the overall space of inter-layer scheduling. Based on the notation, we then thoroughly analyze how different inter-layer scheduling choices influence the performance and energy efficiency of an accelerator step by step. Moreover, we show how to represent existing patterns in our notation and analyze their features. To thoroughly explore the space of the inter-layer scheduling for diverse tiled accelerators and workloads, we develop an end-to-end and highly-portable scheduling framework, SET. Compared with the state-of-the-art (SOTA) open-source Tangram framework, SET can, on average, achieves 1.78\texttimes{} performance improvement and 13.2\% energy cost reduction simultaneously. Moreover, the SET framework will be open-sourced.},
booktitle = {Proceedings of the 50th Annual International Symposium on Computer Architecture},
articleno = {13},
numpages = {17},
keywords = {tiled accelerators, neural networks, inter-layer scheduling, scheduling},
location = {Orlando, FL, USA},
series = {ISCA '23}
}
@inproceedings{cai2024gemini,
title={Gemini: Mapping and Architecture Co-exploration for Large-scale DNN Chiplet Accelerators},
author={Cai, Jingwei and Wu, Zuotong and Peng, Sen and Wei, Yuchen and Tan, Zhanhong and Shi, Guiming and Gao, Mingyu and Ma, Kaisheng},
booktitle={2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA)},
pages={156--171},
year={2024},
organization={IEEE}
}