{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":599547518,"defaultBranch":"main","name":"vllm","ownerLogin":"vllm-project","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-02-09T11:23:20.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/136984999?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1717822267.0","currentOid":""},"activityList":{"items":[{"before":"089476e8b64eca1d0ab2cf18be48ab708aa88992","after":"fa10ec60ef60271d12882ebaaa3776bc4f4c43e2","ref":"refs/heads/torch-xla","pushedAt":"2024-06-09T04:08:39.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Fix docs","shortMessageHtmlLink":"Fix docs"}},{"before":"90d1e319ddc0a6ca49b166c256f66a1cd07aa38d","after":"089476e8b64eca1d0ab2cf18be48ab708aa88992","ref":"refs/heads/torch-xla","pushedAt":"2024-06-09T04:00:46.000Z","pushType":"push","commitsCount":8,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Remove tpu-install.sh","shortMessageHtmlLink":"Remove tpu-install.sh"}},{"before":"0373e1837e1a85c595fa9fc67c775bc6cbe105a2","after":"5d7e3d0176e0dbcf144c64b7d14d996c55e36c50","ref":"refs/heads/main","pushedAt":"2024-06-09T03:50:14.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"youkaichao","name":"youkaichao","path":"/youkaichao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23236638?s=80&v=4"},"commit":{"message":"[mis][ci/test] fix flaky test in test_sharded_state_loader.py (#5361)\n\n[mis][ci/test] fix flaky test in tests/test_sharded_state_loader.py (#5361)","shortMessageHtmlLink":"[mis][ci/test] fix flaky test in test_sharded_state_loader.py (#5361)"}},{"before":"c09dade2a263b6f684d2fbf390c9c1c64761e953","after":"0373e1837e1a85c595fa9fc67c775bc6cbe105a2","ref":"refs/heads/main","pushedAt":"2024-06-09T02:14:43.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"youkaichao","name":"youkaichao","path":"/youkaichao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23236638?s=80&v=4"},"commit":{"message":"[Core][CUDA Graph] add output buffer for cudagraph (#5074)\n\n[Core][CUDA Graph] add output buffer for cudagraph to reduce memory footprint (#5074)","shortMessageHtmlLink":"[Core][CUDA Graph] add output buffer for cudagraph (#5074)"}},{"before":"36ac127870662b723845890fd722c607ea8d807a","after":"90d1e319ddc0a6ca49b166c256f66a1cd07aa38d","ref":"refs/heads/torch-xla","pushedAt":"2024-06-08T20:52:29.000Z","pushType":"push","commitsCount":31,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Merge branch 'main' into torch-xla","shortMessageHtmlLink":"Merge branch 'main' into torch-xla"}},{"before":"f0d3ac9afdc0dfcba97703166e63f542fe51322e","after":"36ac127870662b723845890fd722c607ea8d807a","ref":"refs/heads/torch-xla","pushedAt":"2024-06-08T20:50:14.000Z","pushType":"push","commitsCount":5,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Refactor RoPE","shortMessageHtmlLink":"Refactor RoPE"}},{"before":"8ea5e44a435e8731fd6f5ba4c329dd112752532a","after":"c09dade2a263b6f684d2fbf390c9c1c64761e953","ref":"refs/heads/main","pushedAt":"2024-06-08T17:54:05.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"mgoin","name":"Michael Goin","path":"/mgoin","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3195154?s=80&v=4"},"commit":{"message":"[Misc][Breaking] Change FP8 checkpoint format from act_scale -> input_scale (#5353)","shortMessageHtmlLink":"[Misc][Breaking] Change FP8 checkpoint format from act_scale -> input…"}},{"before":"9fb900f90cbb5614c3e7d67446325ad8b7ac04b2","after":"8ea5e44a435e8731fd6f5ba4c329dd112752532a","ref":"refs/heads/main","pushedAt":"2024-06-08T08:59:21.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"youkaichao","name":"youkaichao","path":"/youkaichao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23236638?s=80&v=4"},"commit":{"message":"[CI/Test] improve robustness of test (vllm_runner) (#5357)\n\n[CI/Test] improve robustness of test by replacing del with context manager (vllm_runner) (#5357)","shortMessageHtmlLink":"[CI/Test] improve robustness of test (vllm_runner) (#5357)"}},{"before":"c96fc067479453b02e92d9378eeeaebb6b3816de","after":"9fb900f90cbb5614c3e7d67446325ad8b7ac04b2","ref":"refs/heads/main","pushedAt":"2024-06-08T05:31:32.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"youkaichao","name":"youkaichao","path":"/youkaichao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23236638?s=80&v=4"},"commit":{"message":"[CI/Test] improve robustness of test (hf_runner) (#5347)\n\n[CI/Test] improve robustness of test by replacing del with context manager (hf_runner) (#5347)","shortMessageHtmlLink":"[CI/Test] improve robustness of test (hf_runner) (#5347)"}},{"before":"7c52473c068a7b8ac325ddbed15e95962159e50e","after":null,"ref":"refs/heads/fix-logits","pushedAt":"2024-06-08T04:51:07.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"}},{"before":"c149a9819c2f83c2b8850c79d54e014d4f27e836","after":null,"ref":"refs/heads/test-acc","pushedAt":"2024-06-08T04:43:21.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"}},{"before":"97a99491116d7510504c442b03509fa13a08e244","after":null,"ref":"refs/heads/compilable-rope","pushedAt":"2024-06-08T04:43:13.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"}},{"before":"6516f6df111d22ee9215c4fa97b836851ba9e2f8","after":null,"ref":"refs/heads/integrate-flashinfer","pushedAt":"2024-06-08T04:43:08.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"}},{"before":"b3376e5c76c199acb216addec7c32ac5299bef31","after":"c96fc067479453b02e92d9378eeeaebb6b3816de","ref":"refs/heads/main","pushedAt":"2024-06-08T02:13:12.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"[ROCm][AMD] Use pytorch sdpa math backend to do naive attention (#4965)","shortMessageHtmlLink":"[ROCm][AMD] Use pytorch sdpa math backend to do naive attention (#4965)"}},{"before":"e69ded7d1c8a4f6ed26e64090bdc050c06cde3b9","after":"b3376e5c76c199acb216addec7c32ac5299bef31","ref":"refs/heads/main","pushedAt":"2024-06-08T01:20:16.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DarkLight1337","name":"Cyrus Leung","path":"/DarkLight1337","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/44970335?s=80&v=4"},"commit":{"message":"[Misc] Add args for selecting distributed executor to benchmarks (#5335)","shortMessageHtmlLink":"[Misc] Add args for selecting distributed executor to benchmarks (#5335)"}},{"before":"767c727a81ae9ec570d30d55b7afc783775d5a05","after":"e69ded7d1c8a4f6ed26e64090bdc050c06cde3b9","ref":"refs/heads/main","pushedAt":"2024-06-08T00:42:05.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"pcmoritz","name":"Philipp Moritz","path":"/pcmoritz","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/113316?s=80&v=4"},"commit":{"message":"[Bug Fix] Fix the support check for FP8 CUTLASS (#5352)\n\nBug description:\r\nWith torch 2.4.0.dev20240603+cu121,\r\ncutlass_fp8_supported outputs False, and the (capability, version) before the comparison is (90, 11111111112)\r\n\r\nThis PR fixes the support check for FP8 CUTLASS ( cutlass_fp8_supported) which was introduced in https://github.com/vllm-project/vllm/pull/5183.","shortMessageHtmlLink":"[Bug Fix] Fix the support check for FP8 CUTLASS (#5352)"}},{"before":"6840a716104c8c17303b938673c2ac019e541700","after":"767c727a81ae9ec570d30d55b7afc783775d5a05","ref":"refs/heads/main","pushedAt":"2024-06-07T21:10:22.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"fix DbrxFusedNormAttention missing cache_config (#5340)\n\nCo-authored-by: team ","shortMessageHtmlLink":"fix DbrxFusedNormAttention missing cache_config (#5340)"}},{"before":"7a9cb294ae317b28a60165b34c8398c762869a74","after":"6840a716104c8c17303b938673c2ac019e541700","ref":"refs/heads/main","pushedAt":"2024-06-07T21:09:13.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"[Misc] Remove unused cuda_utils.h in CPU backend (#5345)","shortMessageHtmlLink":"[Misc] Remove unused cuda_utils.h in CPU backend (#5345)"}},{"before":"ca3ea51bde6c22d0afb3aa0a3fdba6d568095a0a","after":"7a9cb294ae317b28a60165b34c8398c762869a74","ref":"refs/heads/main","pushedAt":"2024-06-07T18:23:32.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ywang96","name":"Roger Wang","path":"/ywang96","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/136131678?s=80&v=4"},"commit":{"message":"[Frontend] Add OpenAI Vision API Support (#5237)\n\nCo-authored-by: DarkLight1337 ","shortMessageHtmlLink":"[Frontend] Add OpenAI Vision API Support (#5237)"}},{"before":"dc49fb892ca32cb364dfc39d711ab84d3b35a28f","after":"ca3ea51bde6c22d0afb3aa0a3fdba6d568095a0a","ref":"refs/heads/main","pushedAt":"2024-06-07T16:36:26.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"[Kernel] Dynamic Per-Token Activation Quantization (#5037)\n\nCo-authored-by: Varun Sundar Rabindranath \r\nCo-authored-by: Varun Sundar Rabindranath ","shortMessageHtmlLink":"[Kernel] Dynamic Per-Token Activation Quantization (#5037)"}},{"before":"18a277b52dd2a64ee4c0111fc8cda126031e5889","after":"dc49fb892ca32cb364dfc39d711ab84d3b35a28f","ref":"refs/heads/main","pushedAt":"2024-06-07T13:35:42.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DarkLight1337","name":"Cyrus Leung","path":"/DarkLight1337","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/44970335?s=80&v=4"},"commit":{"message":"Addition of lacked ignored_seq_groups in _schedule_chunked_prefill (#5296)","shortMessageHtmlLink":"Addition of lacked ignored_seq_groups in _schedule_chunked_prefill (#…"}},{"before":"8d75fe48ca5f46b7af0f5201d8500b9604eed769","after":"18a277b52dd2a64ee4c0111fc8cda126031e5889","ref":"refs/heads/main","pushedAt":"2024-06-07T10:01:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"Yard1","name":"Antoni Baum","path":"/Yard1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10364161?s=80&v=4"},"commit":{"message":"Remove Ray health check (#4693)","shortMessageHtmlLink":"Remove Ray health check (#4693)"}},{"before":"388596c91437a51d428a447594e9faec340c29b2","after":"8d75fe48ca5f46b7af0f5201d8500b9604eed769","ref":"refs/heads/main","pushedAt":"2024-06-07T08:42:35.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"pcmoritz","name":"Philipp Moritz","path":"/pcmoritz","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/113316?s=80&v=4"},"commit":{"message":"[Kernel] Switch fp8 layers to use the CUTLASS kernels (#5183)\n\nSwitching from torch._scaled_mm to vLLM's cutlass fp8 kernels when supported as we are seeing 5-15% improvement in e2e performance on neuralmagic/Meta-Llama-3-8B-Instruct-FP8\r\n\r\nsee https://docs.google.com/spreadsheets/d/1GiAnmzyGHgZ6zL_LDSTm35Bdrt4A8AaFEurDlISYYA4/ for some quick e2e benchmarks and #5144 for comparisons across different GEMM sizes.","shortMessageHtmlLink":"[Kernel] Switch fp8 layers to use the CUTLASS kernels (#5183)"}},{"before":"baa15a9ec320a6b90222df0aaed13b89e3bafc9c","after":"388596c91437a51d428a447594e9faec340c29b2","ref":"refs/heads/main","pushedAt":"2024-06-07T05:15:12.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"[Misc][Utils] allow get_open_port to be called for multiple times (#5333)","shortMessageHtmlLink":"[Misc][Utils] allow get_open_port to be called for multiple times (#5333"}},{"before":"15063741e30881d7a982c3436c3299a0551327dc","after":"baa15a9ec320a6b90222df0aaed13b89e3bafc9c","ref":"refs/heads/main","pushedAt":"2024-06-07T03:29:24.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DarkLight1337","name":"Cyrus Leung","path":"/DarkLight1337","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/44970335?s=80&v=4"},"commit":{"message":"[Feature][Frontend]: Add support for `stream_options` in `ChatCompletionRequest` (#5135)","shortMessageHtmlLink":"[Feature][Frontend]: Add support for stream_options in `ChatComplet…"}},{"before":"ccdc490dda3f534c63c1faf29a638e65594d0dc3","after":"15063741e30881d7a982c3436c3299a0551327dc","ref":"refs/heads/main","pushedAt":"2024-06-07T03:17:21.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"[Misc] Missing error message for custom ops import (#5282)","shortMessageHtmlLink":"[Misc] Missing error message for custom ops import (#5282)"}},{"before":"a31cab7556f540b558b0b454b4a4b9b438542566","after":"ccdc490dda3f534c63c1faf29a638e65594d0dc3","ref":"refs/heads/main","pushedAt":"2024-06-07T02:07:57.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"[Core] Change LoRA embedding sharding to support loading methods (#5038)","shortMessageHtmlLink":"[Core] Change LoRA embedding sharding to support loading methods (#5038)"}},{"before":"3eb30a967eded5023546469cdb73d3e6a5b50ce7","after":null,"ref":"refs/heads/avoid_tokens_copy","pushedAt":"2024-06-07T02:06:14.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"}},{"before":"828da0d44e9124d949909477d6018fc08469a31e","after":"a31cab7556f540b558b0b454b4a4b9b438542566","ref":"refs/heads/main","pushedAt":"2024-06-07T01:12:00.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"zhuohan123","name":"Zhuohan Li","path":"/zhuohan123","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/17310766?s=80&v=4"},"commit":{"message":"[Core] Avoid copying prompt/output tokens if no penalties are used (#5289)","shortMessageHtmlLink":"[Core] Avoid copying prompt/output tokens if no penalties are used (#…"}},{"before":"e881c1cf9a47d8a2a75bdd7caa79692b9abec723","after":"f0d3ac9afdc0dfcba97703166e63f542fe51322e","ref":"refs/heads/torch-xla","pushedAt":"2024-06-06T22:35:12.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"WoosukKwon","name":"Woosuk Kwon","path":"/WoosukKwon","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/46394894?s=80&v=4"},"commit":{"message":"Disable top-p sampling","shortMessageHtmlLink":"Disable top-p sampling"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEYDK0lQA","startCursor":null,"endCursor":null}},"title":"Activity · vllm-project/vllm"}