Merge aefd118 into 880ae19

RasaHQ · Oct 26, 2020 · 49689ef · 49689ef
2 parents 880ae19 + aefd118
commit 49689ef
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 3 deletions.
diff --git a/changelog/7102.improvement.md b/changelog/7102.improvement.md
@@ -0,0 +1,5 @@
+Changed the default model weights loaded for `HFTransformersNLP` component.
+
+Use a [language agnostic sentence embedding model](https://tfhub.dev/google/LaBSE/1) 
+as the default model. These model weights should help improve performance on 
+intent classification and response selection.
diff --git a/docs/docs/components.mdx b/docs/docs/components.mdx
@@ -187,7 +187,7 @@ word vectors in your pipeline.
   | Language Model | Parameter    | Default value for       |
   |                | "model_name" | "model_weights"         |
   +----------------+--------------+-------------------------+
-  | BERT           | bert         | bert-base-uncased       |
+  | BERT           | bert         | rasa/LaBSE              |
   +----------------+--------------+-------------------------+
   | GPT            | gpt          | openai-gpt              |
   +----------------+--------------+-------------------------+
@@ -209,7 +209,7 @@ word vectors in your pipeline.
       # Name of the language model to use
       model_name: "bert"
       # Pre-Trained weights to be loaded
-      model_weights: "bert-base-uncased"
+      model_weights: "rasa/LaBSE"
 
       # An optional path to a specific directory to download and cache the pre-trained model weights.
       # The `default` cache_dir is the same as https://huggingface.co/transformers/serialization.html#cache-directory .

diff --git a/rasa/nlu/utils/hugging_face/registry.py b/rasa/nlu/utils/hugging_face/registry.py
@@ -55,7 +55,7 @@
     "roberta": RobertaTokenizer,
 }
 model_weights_defaults = {
-    "bert": "bert-base-uncased",
+    "bert": "rasa/LaBSE",
     "gpt": "openai-gpt",
     "gpt2": "gpt2",
     "xlnet": "xlnet-base-cased",