From cc37563948b667032e240ed00e18fae9819c9f9a Mon Sep 17 00:00:00 2001 From: Aghilan Nathan Date: Fri, 13 Mar 2026 02:47:20 +0000 Subject: [PATCH 1/3] feat: swiglu simple math changes for perf upgrades --- src/tilegym/ops/cutile/swiglu.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/tilegym/ops/cutile/swiglu.py b/src/tilegym/ops/cutile/swiglu.py index 5dd2743..394ab8e 100644 --- a/src/tilegym/ops/cutile/swiglu.py +++ b/src/tilegym/ops/cutile/swiglu.py @@ -88,13 +88,9 @@ def swiglu_backward_kernel(dc, a, b, da, db, TILE_SIZE: ct.Constant[int]): a_tile_f32 = a_tile.astype(ct.float32) b_tile_f32 = b_tile.astype(ct.float32) - # NOTE: sigmoid is intentionally inlined here to preserve current backward - # kernel behavior and benchmark baselines. Forward already uses - # the shared `sigmoid()` helper; backward will switch to it in a follow-up - # optimization PR that re-benchmarks backward performance. - # Compute sigmoid(a) and silu(a) - sigmoid_a = 1.0 / (1.0 + ct.exp(-a_tile_f32)) - silu_a = a_tile_f32 * sigmoid_a + # Reuse shared helpers for consistent math path with forward. + sigmoid_a = sigmoid(a_tile_f32) + silu_a = silu(a_tile_f32) # db = dc * silu(a) db_tile = dc_tile * silu_a From d18aaac322e03c879a4a02cdd6f6ea317183ca36 Mon Sep 17 00:00:00 2001 From: Aghilan Nathan Date: Fri, 13 Mar 2026 02:48:59 +0000 Subject: [PATCH 2/3] feat: swiglu simple math changes for perf upgrades --- src/tilegym/ops/cutile/swiglu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tilegym/ops/cutile/swiglu.py b/src/tilegym/ops/cutile/swiglu.py index 394ab8e..402e399 100644 --- a/src/tilegym/ops/cutile/swiglu.py +++ b/src/tilegym/ops/cutile/swiglu.py @@ -88,7 +88,6 @@ def swiglu_backward_kernel(dc, a, b, da, db, TILE_SIZE: ct.Constant[int]): a_tile_f32 = a_tile.astype(ct.float32) b_tile_f32 = b_tile.astype(ct.float32) - # Reuse shared helpers for consistent math path with forward. sigmoid_a = sigmoid(a_tile_f32) silu_a = silu(a_tile_f32) From e6f4145478fdf59c189b4f2f6d02453ae0ffe7cf Mon Sep 17 00:00:00 2001 From: Aghilan Nathan <90127404+aghilann@users.noreply.github.com> Date: Tue, 17 Mar 2026 01:00:13 -0400 Subject: [PATCH 3/3] Update src/tilegym/ops/cutile/swiglu.py Co-authored-by: hannahli <165050866+hannahli-nv@users.noreply.github.com> --- src/tilegym/ops/cutile/swiglu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tilegym/ops/cutile/swiglu.py b/src/tilegym/ops/cutile/swiglu.py index 402e399..9dbb83a 100644 --- a/src/tilegym/ops/cutile/swiglu.py +++ b/src/tilegym/ops/cutile/swiglu.py @@ -89,7 +89,7 @@ def swiglu_backward_kernel(dc, a, b, da, db, TILE_SIZE: ct.Constant[int]): b_tile_f32 = b_tile.astype(ct.float32) sigmoid_a = sigmoid(a_tile_f32) - silu_a = silu(a_tile_f32) + silu_a = a_tile_f32 * sigmoid_a # db = dc * silu(a) db_tile = dc_tile * silu_a