Merge pull request #5389 from HatuneMiku/master

Add cache_dir config for save HFTransformersNLP pre-trained model data.
RasaHQ · Mar 18, 2020 · ad98964 · ad98964
2 parents da25fef + 8d90465
commit ad98964
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 2 deletions.
diff --git a/changelog/5389.feature.rst b/changelog/5389.feature.rst
@@ -0,0 +1 @@
+Add an optional path to a specific directory to download and cache the pre-trained model weights for :ref:`HFTransformersNLP`.
diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst
@@ -119,6 +119,10 @@ HFTransformersNLP
             # can be found at https://huggingface.co/transformers/pretrained_models.html . If left empty, it uses the
             # default model architecture that original transformers library loads
             model_weights: "bert-base-uncased"
+            
+            # An optional path to a specific directory to download and cache the pre-trained model weights.
+            # The `default` cache_dir is the same as https://huggingface.co/transformers/serialization.html#cache-directory .
+            cache_dir: null
 
         #    +----------------+--------------+-------------------------+
         #    | Language Model | Parameter    | Default value for       |

diff --git a/rasa/nlu/utils/hugging_face/hf_transformers.py b/rasa/nlu/utils/hugging_face/hf_transformers.py
@@ -36,6 +36,9 @@ class HFTransformersNLP(Component):
         "model_name": "bert",
         # Pre-Trained weights to be loaded(string)
         "model_weights": None,
+        # an optional path to a specific directory to download
+        # and cache the pre-trained model weights.
+        "cache_dir": None,
     }
 
     def __init__(self, component_config: Optional[Dict[Text, Any]] = None) -> None:
@@ -63,6 +66,7 @@ def _load_model(self) -> None:
             )
 
         self.model_weights = self.component_config["model_weights"]
+        self.cache_dir = self.component_config["cache_dir"]
 
         if not self.model_weights:
             logger.info(
@@ -74,10 +78,10 @@ def _load_model(self) -> None:
         logger.debug(f"Loading Tokenizer and Model for {self.model_name}")
 
         self.tokenizer = model_tokenizer_dict[self.model_name].from_pretrained(
-            self.model_weights
+            self.model_weights, cache_dir=self.cache_dir
         )
         self.model = model_class_dict[self.model_name].from_pretrained(
-            self.model_weights
+            self.model_weights, cache_dir=self.cache_dir
         )
 
         # Use a universal pad token since all transformer architectures do not have a