Skip to content

Commit

Permalink
Merge pull request #1322 from hyunwoongko/add_gelu_fusion
Browse files Browse the repository at this point in the history
Add gelu fusion
  • Loading branch information
theblackcat102 committed Feb 8, 2023
2 parents 5d2e8a2 + 802c91a commit 45c1c14
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 0 deletions.
1 change: 1 addition & 0 deletions model/supervised_finetuning/configs/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ defaults:
quantization: false
seq2seqmodel: false
poly_eps: 1.0
fuse_gelu: true

galactica-125m:
learning_rate: 5e-5
Expand Down
55 changes: 55 additions & 0 deletions model/supervised_finetuning/efficiency_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import functools

import torch
from transformers.activations import FastGELUActivation, GELUActivation, NewGELUActivation, QuickGELUActivation


def rsetattr(obj, attr, val):
pre, _, post = attr.rpartition(".")
return setattr(rgetattr(obj, pre) if pre else obj, post, val)


def rgetattr(obj, attr, *args):
def _getattr(obj, attr):
return getattr(obj, attr, *args)

return functools.reduce(_getattr, [obj] + attr.split("."))


def fuse_gelu(model):
@torch.jit.script
def gelu_fwd(x):
return x * 0.5 * (1.0 + torch.tanh(0.79788456 * x * (1 + 0.044715 * x * x)))

@torch.jit.script
def gelu_bwd(g, x):
tanh_out = torch.tanh(0.79788456 * x * (1 + 0.044715 * x * x))
ff = 0.5 * x * ((1 - tanh_out * tanh_out) * (0.79788456 + 0.1070322243 * x * x)) + 0.5 * (1 + tanh_out)
return ff * g

class _FusedGeLUFunction(torch.autograd.Function):
@staticmethod
# bias is an optional argument
def forward(ctx, input):
ctx.input_tensor = input
return gelu_fwd(input)

@staticmethod
def backward(ctx, grad_output):
input = ctx.input_tensor
tmp = gelu_bwd(grad_output, input)
return tmp

class FusedGelu(torch.nn.Module):
def forward(self, input):
return _FusedGeLUFunction.apply(input)

fused_gelu_module = FusedGelu()
hf_gelu_functions = [GELUActivation, FastGELUActivation, NewGELUActivation, QuickGELUActivation]

for name, module in model.named_modules():
for hf_gelu_function in hf_gelu_functions:
if isinstance(module, hf_gelu_function):
rsetattr(model, name, fused_gelu_module)

return model
4 changes: 4 additions & 0 deletions model/supervised_finetuning/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import bitsandbytes
import torch
from efficiency_utils import fuse_gelu
from torch import nn
from transformers import PreTrainedModel, Trainer, TrainingArguments
from transformers.training_args import OptimizerNames
Expand Down Expand Up @@ -152,6 +153,9 @@ def argument_parsing(notebook=False, notebook_args=None):
module, "weight", {"optim_bits": 32}
)

if training_conf.fuse_gelu:
model = fuse_gelu(model)

args = TrainingArguments(
output_dir=f"{training_conf.model_name}-{training_conf.log_dir}-finetuned",
num_train_epochs=training_conf.num_train_epochs,
Expand Down

0 comments on commit 45c1c14

Please sign in to comment.