Dev zhanglu (#3649)

Check whether the size of the embedding layer has been resized, rather than checking if the length of the tokenizer is equal to the size of the embedding layer. This is because the embedding layer of the model may be resized and padded to a multiple of 16, but the tokenizer may not be padded, which will cause the freezing layer to fail. However, at this time, the embedding layer will not be resized and freezing can still be performed.
LAION-AI · Aug 29, 2023 · 7e40ee3 · 7e40ee3
1 parent fd9ecfa
commit 7e40ee3
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/model/model_training/utils/utils.py b/model/model_training/utils/utils.py
@@ -330,15 +330,16 @@ def get_model(conf, tokenizer, pad_vocab_size_to_multiple_of=16, check_freeze_la
             )
 
         n_embs = model.get_input_embeddings().num_embeddings
-        if len(tokenizer) != n_embs and check_freeze_layer:
-            assert not conf.freeze_layer, "Cannot change the number of embeddings if the model is frozen."
-
         if len(tokenizer) != n_embs or pad_vocab_size_to_multiple_of:
             p = pad_vocab_size_to_multiple_of
             target_size = len(tokenizer) if not p else math.ceil(len(tokenizer) / p) * p
             print("Resizing embeddings to", target_size)
             model.resize_token_embeddings(target_size)
 
+        new_n_embs = model.get_input_embeddings().num_embeddings
+        if new_n_embs != n_embs and check_freeze_layer:
+            assert not conf.freeze_layer, "Cannot change the number of embeddings if the model is frozen."
+
         if conf.freeze_layer:
             model = freeze_top_n_layers(model, conf.freeze_layer)