{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":279579416,"defaultBranch":"master","name":"GemmKernels.jl","ownerLogin":"JuliaGPU","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2020-07-14T12:30:23.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/7346142?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1715128177.0","currentOid":""},"activityList":{"items":[{"before":null,"after":"c22842bcfcd59bac160945ac129fc1f6ded413ed","ref":"refs/heads/compathelper/new_version/2024-05-08-00-29-36-045-03533719879","pushedAt":"2024-05-08T00:29:37.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"github-actions[bot]","name":null,"path":"/apps/github-actions","primaryAvatarUrl":"https://avatars.githubusercontent.com/in/15368?s=80&v=4"},"commit":{"message":"CompatHelper: bump compat for LLVM to 7, (keep existing compat)","shortMessageHtmlLink":"CompatHelper: bump compat for LLVM to 7, (keep existing compat)"}},{"before":"0f8c25d2f3110e0cd02868a5b1508c48d820b172","after":"ff117b357c13ebe43aae8a9a6ff589c0ca9ea2e1","ref":"refs/heads/tf/new-pipelining-kernel","pushedAt":"2024-05-06T12:50:48.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add new pipelined kernel\n\nAdd alternative pipelining kernel. Compared to the old pipelining\nkernel, the loads/stores are reordered somewhat, and shared memory\nis split in two stages.\n\nThis reduces the number of necessary bar.syncs to 1/3, but\nnecessitates halving the BLOCK_K tile size.","shortMessageHtmlLink":"Add new pipelined kernel"}},{"before":"61052b8fe00b38cfde7bd7ae3881399e4eff5a89","after":"0f8c25d2f3110e0cd02868a5b1508c48d820b172","ref":"refs/heads/tf/new-pipelining-kernel","pushedAt":"2024-05-06T12:43:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"TEMP","shortMessageHtmlLink":"TEMP"}},{"before":"f921fa997755b2ed1b28e35f814d94d841488634","after":null,"ref":"refs/heads/tf/cta-swizzle","pushedAt":"2024-05-06T12:41:35.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"}},{"before":"43399267fba813f79571a52aa5718292217734c7","after":"d3be41cb9c3da5424c29fa7c356412604ef86a5b","ref":"refs/heads/master","pushedAt":"2024-05-06T12:41:34.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add support for CTA swizzling (#195)\n\nApply a swizzling function to the mapping between tiles of the D output\r\nmatrix and the CTA ID. The goal is to maximise the probability that CTAs\r\nthat access the same tile of A/B are scheduled on neighbouring SMs at\r\nthe same time, thereby increasing L2 hit rate.","shortMessageHtmlLink":"Add support for CTA swizzling (#195)"}},{"before":"7d8e3c855388205c54bf61db36110c4d3c60eb6c","after":null,"ref":"refs/heads/tf/zero-shared-c-layout","pushedAt":"2024-05-06T12:41:14.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"}},{"before":"c98fa10dab26afdb5dbe2441ceee173548c290e4","after":"43399267fba813f79571a52aa5718292217734c7","ref":"refs/heads/master","pushedAt":"2024-05-06T12:41:13.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Use a zero layout for C in shared memory if beta=0 (#194)\n\nWhile we already avoid a global load from C in case beta == 0, we still\r\nemit stores to shared memory and loads from shared memory for C.\r\n\r\nInstead, we should also use a zero layout for C in shared memory, which\r\neliminates these extra loads and stores.\r\n\r\nThis does not seem to influence the performance of GEMM, even for small\r\nmatrices, or highly rectangular GEMMs with small K, but it does make a\r\ndifference for some TCs, I've noticed, so let's do this, anyway.","shortMessageHtmlLink":"Use a zero layout for C in shared memory if beta=0 (#194)"}},{"before":"5f29724b7899d2e88c9e61b89049f23bd3bc4eba","after":null,"ref":"refs/heads/tf/fix-inlining-vstorea!","pushedAt":"2024-05-06T12:40:57.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"}},{"before":"26cbc57277781bf2a205fef6bb7470ddb8ad0549","after":"c98fa10dab26afdb5dbe2441ceee173548c290e4","ref":"refs/heads/master","pushedAt":"2024-05-06T12:40:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Fix vstorea! not being inlined (#193)\n\nIn some Tensor Contractions, calls to `vstorea!` were not being inlined.\r\nAdding `$(Expr(:meta, :inline))` to `vstorea!` fixes this. For good\r\nmeasure, we might as well add it to `vloada` as well.","shortMessageHtmlLink":"Fix vstorea! not being inlined (#193)"}},{"before":"df973e7b0533b5ae311befb2650aa6936b65a171","after":"f921fa997755b2ed1b28e35f814d94d841488634","ref":"refs/heads/tf/cta-swizzle","pushedAt":"2024-05-02T11:08:37.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add support for CTA swizzling\n\nApply a swizzling function to the mapping between tiles of the D output\nmatrix and the CTA ID. The goal is to maximise the probability that CTAs\nthat access the same tile of A/B are scheduled on neighbouring SMs at\nthe same time, thereby increasing L2 hit rate.","shortMessageHtmlLink":"Add support for CTA swizzling"}},{"before":null,"after":"df973e7b0533b5ae311befb2650aa6936b65a171","ref":"refs/heads/tf/cta-swizzle","pushedAt":"2024-05-02T11:02:18.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add support for CTA swizzling\n\nApply a swizzling function to the mapping between tiles of the D output\nmatrix and the CTA ID. The goal is to maximise the probability that CTAs\nthat access the same tile of A/B are scheduled on neighbouring SMs at\nthe same time, thereby increasing L2 hit rate.","shortMessageHtmlLink":"Add support for CTA swizzling"}},{"before":null,"after":"61052b8fe00b38cfde7bd7ae3881399e4eff5a89","ref":"refs/heads/tf/new-pipelining-kernel","pushedAt":"2024-04-26T17:17:26.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"WIP","shortMessageHtmlLink":"WIP"}},{"before":null,"after":"7d8e3c855388205c54bf61db36110c4d3c60eb6c","ref":"refs/heads/tf/zero-shared-c-layout","pushedAt":"2024-04-23T11:13:13.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Use a zero layout for C in shared memory if beta=0\n\nWhile we already avoid a global load from C in case beta == 0, we still\nemit stores to shared memory and loads from shared memory for C.\n\nInstead, we should also use a zero layout for C in shared memory, which\neliminates these extra loads and stores.\n\nThis does not seem to influence the performance of GEMM, even for small\nmatrices, or highly rectangular GEMMs with small K, but it does make a\ndifference for some TCs, I've noticed, so let's do this, anyway.","shortMessageHtmlLink":"Use a zero layout for C in shared memory if beta=0"}},{"before":null,"after":"5f29724b7899d2e88c9e61b89049f23bd3bc4eba","ref":"refs/heads/tf/fix-inlining-vstorea!","pushedAt":"2024-04-23T09:31:40.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Fix vstorea! not being inlined\n\nIn some Tensor Contractions, calls to `vstorea!` were not being inlined.\nAdding `$(Expr(:meta, :inline))` to `vstorea!` fixes this. For good\nmeasure, we might as well add it to `vloada` as well.","shortMessageHtmlLink":"Fix vstorea! not being inlined"}},{"before":"24dff3e0ee9615ca27afa594963ad94addc60f32","after":"1bdddf582187fa7035655bf393cae1a0ef9120fe","ref":"refs/heads/benchmark-results","pushedAt":"2024-04-18T08:33:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"Results for 26cbc57277781bf2a205fef6bb7470ddb8ad0549.","shortMessageHtmlLink":"Results for 26cbc57."}},{"before":"e3b12f22df09e20c37ff61977fb5ac210e8596a1","after":"26cbc57277781bf2a205fef6bb7470ddb8ad0549","ref":"refs/heads/master","pushedAt":"2024-04-18T07:20:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"Bump version.","shortMessageHtmlLink":"Bump version."}},{"before":"4afa16d9d408d2b7a6c2e362bafddcf80bbf9759","after":"7230c9fbb288a605a83072b55816603395f49d52","ref":"refs/heads/tf/2024-04-05-profile","pushedAt":"2024-04-06T13:11:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Profile all TCs","shortMessageHtmlLink":"Profile all TCs"}},{"before":"55ee2070b72ff3938f4b03c73bd4bac578959293","after":"4afa16d9d408d2b7a6c2e362bafddcf80bbf9759","ref":"refs/heads/tf/2024-04-05-profile","pushedAt":"2024-04-05T13:51:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Collect all metrics","shortMessageHtmlLink":"Collect all metrics"}},{"before":"73db8263509b8a528be79e49306118d9f1651f28","after":"55ee2070b72ff3938f4b03c73bd4bac578959293","ref":"refs/heads/tf/2024-04-05-profile","pushedAt":"2024-04-05T13:01:32.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Fix","shortMessageHtmlLink":"Fix"}},{"before":"90e9fa983bead284d2a2b4364d50d19a9fac5f75","after":"73db8263509b8a528be79e49306118d9f1651f28","ref":"refs/heads/tf/2024-04-05-profile","pushedAt":"2024-04-05T13:01:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add script to create profiles","shortMessageHtmlLink":"Add script to create profiles"}},{"before":null,"after":"90e9fa983bead284d2a2b4364d50d19a9fac5f75","ref":"refs/heads/tf/2024-04-05-profile","pushedAt":"2024-04-05T11:41:26.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Optimize.","shortMessageHtmlLink":"Optimize."}},{"before":"d4b04dafca1b91beef2c9f1fdb13c4db9de40db7","after":"24dff3e0ee9615ca27afa594963ad94addc60f32","ref":"refs/heads/benchmark-results","pushedAt":"2024-04-02T15:30:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"Results for e3b12f22df09e20c37ff61977fb5ac210e8596a1.","shortMessageHtmlLink":"Results for e3b12f2."}},{"before":"643996042c1b3bcacefa62edf476a747823a020d","after":null,"ref":"refs/heads/dependabot/github_actions/julia-actions/setup-julia-2","pushedAt":"2024-04-02T14:00:21.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"}},{"before":"b7b866f6698e1ed849012f528eef6f176c0b8ab6","after":"e3b12f22df09e20c37ff61977fb5ac210e8596a1","ref":"refs/heads/master","pushedAt":"2024-04-02T14:00:20.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"Bump julia-actions/setup-julia from 1 to 2 (#191)\n\nBumps [julia-actions/setup-julia](https://github.com/julia-actions/setup-julia) from 1 to 2.\r\n- [Release notes](https://github.com/julia-actions/setup-julia/releases)\r\n- [Commits](https://github.com/julia-actions/setup-julia/compare/v1...v2)\r\n\r\n---\r\nupdated-dependencies:\r\n- dependency-name: julia-actions/setup-julia\r\n dependency-type: direct:production\r\n update-type: version-update:semver-major\r\n...\r\n\r\nSigned-off-by: dependabot[bot] \r\nCo-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>","shortMessageHtmlLink":"Bump julia-actions/setup-julia from 1 to 2 (#191)"}},{"before":null,"after":"643996042c1b3bcacefa62edf476a747823a020d","ref":"refs/heads/dependabot/github_actions/julia-actions/setup-julia-2","pushedAt":"2024-04-01T18:46:38.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"dependabot[bot]","name":null,"path":"/apps/dependabot","primaryAvatarUrl":"https://avatars.githubusercontent.com/in/29110?s=80&v=4"},"commit":{"message":"Bump julia-actions/setup-julia from 1 to 2\n\nBumps [julia-actions/setup-julia](https://github.com/julia-actions/setup-julia) from 1 to 2.\n- [Release notes](https://github.com/julia-actions/setup-julia/releases)\n- [Commits](https://github.com/julia-actions/setup-julia/compare/v1...v2)\n\n---\nupdated-dependencies:\n- dependency-name: julia-actions/setup-julia\n dependency-type: direct:production\n update-type: version-update:semver-major\n...\n\nSigned-off-by: dependabot[bot] ","shortMessageHtmlLink":"Bump julia-actions/setup-julia from 1 to 2"}},{"before":"690a53deb42343a1155176a63d2bd9eea0a47c29","after":"85c4f15cf775456fdf6d27e0b43c000687a921be","ref":"refs/heads/tf/tc-override","pushedAt":"2024-03-27T14:34:29.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Fix incorrect results for transposed C/D","shortMessageHtmlLink":"Fix incorrect results for transposed C/D"}},{"before":"1868adcbafb281e1a1ca031a73a9dd77bbdbcc87","after":"690a53deb42343a1155176a63d2bd9eea0a47c29","ref":"refs/heads/tf/tc-override","pushedAt":"2024-03-22T15:38:51.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add WIP script to sweep layout","shortMessageHtmlLink":"Add WIP script to sweep layout"}},{"before":null,"after":"1868adcbafb281e1a1ca031a73a9dd77bbdbcc87","ref":"refs/heads/tf/tc-override","pushedAt":"2024-03-21T13:15:23.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Allow overriding layout","shortMessageHtmlLink":"Allow overriding layout"}},{"before":"c8ff4d36aab81f714f5401061f31eb2266adfab6","after":"d4b04dafca1b91beef2c9f1fdb13c4db9de40db7","ref":"refs/heads/benchmark-results","pushedAt":"2024-03-01T14:00:07.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"Results for b7b866f6698e1ed849012f528eef6f176c0b8ab6.","shortMessageHtmlLink":"Results for b7b866f."}},{"before":"c3e22e3e571d0d7d5056fae36341c13df74f1a33","after":"b7b866f6698e1ed849012f528eef6f176c0b8ab6","ref":"refs/heads/master","pushedAt":"2024-02-23T12:28:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"Add FPU tuning script.","shortMessageHtmlLink":"Add FPU tuning script."}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAERBuAwwA","startCursor":null,"endCursor":null}},"title":"Activity ยท JuliaGPU/GemmKernels.jl"}