Skip to content

Commit

Permalink
Fix padding not used correctly in exllama v2 layer (#626)
Browse files Browse the repository at this point in the history
padding not used correctly in exllama
  • Loading branch information
Qubitium committed Apr 5, 2024
1 parent 866b4c8 commit b4b801c
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
5 changes: 4 additions & 1 deletion auto_gptq/nn_modules/qlinear/qlinear_exllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,11 @@ def __init__(self, bits, group_size, infeatures, outfeatures, bias, trainable=Fa
if trainable:
raise NotImplementedError("Exllama kernel does not support training.")

self.padding = -outfeatures % 32
self.outfeatures = outfeatures + self.padding
outfeatures = self.outfeatures

self.infeatures = infeatures
self.outfeatures = outfeatures
self.bits = bits
self.group_size = group_size if group_size != -1 else infeatures
self.trainable = trainable
Expand Down
4 changes: 3 additions & 1 deletion auto_gptq/nn_modules/qlinear/qlinear_exllamav2.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,12 @@ def __init__(self, bits, group_size, infeatures, outfeatures, bias, trainable=Fa

self.q_handle = None
self.q_tensors = None

self.padding = -outfeatures % 32
self.outfeatures = outfeatures + self.padding
outfeatures = self.outfeatures

self.infeatures = infeatures
self.outfeatures = outfeatures + self.padding
self.bits = bits
self.group_size = group_size if group_size != -1 else infeatures
self.trainable = trainable
Expand Down

0 comments on commit b4b801c

Please sign in to comment.