This repository contains top solutions from WhoIsWho, the world’s largest manually-labeled name disambiguation benchmark.
WhoIsWho is the world’s largest manually-labeled name disambiguation benchmark, consisting of over 900,000 papers collected from AMiner database. The owners of each paper are annotated by skillful crowdworkers via the rigorous process.
We also propose two related representative name disambiguation tasks, Incremental Name Disambiguation and Name Disambiguation from Scratch, to comprehensively evaluate the capability of relevant name disambiguation methods.
Please go to from_scratch/rankx
and incremental_name_disambiguation/rankx
for reproduce details of Name Disambiguation from Scratch and Incremental Name Disambiguation, respectively.
🌟 If you find our work helpful, please leave us a star and cite our paper.
@inproceedings{chen2023web,
title={Web-scale academic name disambiguation: the WhoIsWho benchmark, leaderboard, and toolkit},
author={Chen, Bo and Zhang, Jing and Zhang, Fanjin and Han, Tianyi and Cheng, Yuqing and Li, Xiaoyan and Dong, Yuxiao and Tang, Jie},
booktitle={Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
pages={3817--3828},
year={2023}
}
@inproceedings{zhang2024oag,
title={OAG-bench: a human-curated benchmark for academic graph mining},
author={Fanjin Zhang and Shijie Shi and Yifan Zhu and Bo Chen and Yukuo Cen and Jifan Yu and Yelin Chen and Lulu Wang and Qingfei Zhao and Yuqing Cheng and Tianyi Han and Yuwei An and Dan Zhang and Weng Lam Tam and Kun Cao and Yunhe Pang and Xinyu Guan and Huihui Yuan and Jian Song and Xiaoyan Li and Yuxiao Dong and Jie Tang},
booktitle={Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
pages={6214--6225},
year={2024}
}