Skip to content

Commit

Permalink
fix summed biases not being divided by mp size (#1220)
Browse files Browse the repository at this point in the history
  • Loading branch information
dmahan93 committed Jun 7, 2024
1 parent 4a34e0a commit 90a6cdb
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions tools/ckpts/convert_hf_to_sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,27 @@ def shard_sequential_mp(num_mp_ranks, sequential):
ranks = {x: dict() for x in range(num_mp_ranks)}
for k, v in sequential.items():
if reduce(
np.logical_or,
[
x in k
for x in [
"dense_4h_to_h.bias",
"attention.dense.bias",
]
],
):
# Divide by tp_size since they get added together
for x in range(num_mp_ranks):
ranks[x][k] = v / num_mp_ranks
elif reduce(
np.logical_or,
[
x in k
for x in [
"layernorm",
"rotary_emb",
"dense_4h_to_h.bias",
"norm.weight",
"norm.bias",
"attention.dense.bias",
]
],
):
Expand Down

0 comments on commit 90a6cdb

Please sign in to comment.