docs: updated docs

KennethEnevoldsen · Nov 23, 2022 · 0b2287b · 0b2287b
1 parent 9c37520
commit 0b2287b
Show file tree

Hide file tree

Showing 8 changed files with 37 additions and 47 deletions.
diff --git a/docs/faq.rst b/docs/faq.rst
@@ -68,6 +68,7 @@ To make the documentation you can run:
 
    # install sphinx, themes and extensions
    pip install -r requirements.txt
+   pip install -e .
 
    # generate html from documentations
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -52,7 +52,7 @@ authors.
    :hidden:
 
    installation
-   faq
+   wrap.pipeline_component
 
 .. toctree::
    :caption: News
@@ -64,17 +64,7 @@ authors.
    :caption: FAQ
    :hidden:
 
-
-
-.. toctree::
-   :maxdepth: 3
-   :caption: API references
-   :hidden:
-
-   wrap.architectures
-   wrap.layers
-   wrap.pipeline_component   
-
+   faq
 
 .. toctree::
    :caption: GitHub

diff --git a/docs/news.rst b/docs/news.rst
@@ -3,16 +3,16 @@ News and Changelog
 
 * 1.2.0 (23/11/22)
 
-- Automatically attempts to extract labels from HuggingFace model config if left unspecified.
-- Added new pipeline "token_classification_transformer" for token classification, including NER, POS and other types of token predictions
+  - Automatically attempts to extract labels from HuggingFace model config if left unspecified.
+  - Added new pipeline "token_classification_transformer" for token classification, including NER, POS and other types of token predictions
 
-  - Automatically infer NER or POS from labels and assign these to `doc.ents` or `doc.pos` if possible.
+    - Automatically infer NER or POS from labels and assign these to `doc.ents` or `doc.pos` if possible.
 
-- Renamed "classification_transformer" to "sequence_classification_transformer" to avoid confusion with new pipelines.
+  - Renamed "classification_transformer" to "sequence_classification_transformer" to avoid confusion with new pipelines.
 
-  - Automatically assign sequence prediction to `docs.cats`. Can be toggled off by setting the `"assign_to_cats": False`
+    - Automatically assign sequence prediction to `docs.cats`. Can be toggled off by setting the `"assign_to_cats": False`
 
-- Update to documentation
+  - Update to documentation
 
 
 * 1.1.0 (15/08/22)

diff --git a/docs/wrap.architectures.rst b/docs/wrap.architectures.rst
diff --git a/docs/wrap.layers.rst b/docs/wrap.layers.rst
diff --git a/docs/wrap.pipeline_component.rst b/docs/wrap.pipeline_component.rst
@@ -19,5 +19,5 @@ Sequence Classification Transformer
 Sequence Classification Transformer
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. autofunction:: spacy_wrap.pipeline_component_token_clf.make_token_classification_transformer
+.. autofunction:: spacy_wrap.pipeline_component_tok_clf.make_token_classification_transformer
 
diff --git a/spacy_wrap/about.py b/spacy_wrap/about.py
@@ -1,3 +1,6 @@
+from importlib.metadata import version
+
+__version__ = version("spacy_wrap")
 __title__ = "spacy-wrap"
 __download_url__ = "https://github.com/kennethenevoldsen/spacy-wrap"
 __documentation__ = "https://kennethenevoldsen.github.io/spacy-wrap"
diff --git a/spacy_wrap/pipeline_component_tok_clf.py b/spacy_wrap/pipeline_component_tok_clf.py
@@ -90,6 +90,8 @@ def make_token_classification_transformer(
     to the single shared weights.
 
     Args:
+        nlp (Language): The current nlp object.
+        name (str): The name of the component instance.
         model (Model[List[Doc], FullTransformerBatch]): A thinc Model object wrapping
             the transformer. Usually you will want to use the ClassificationTransformer
             layer for this.
@@ -98,6 +100,11 @@ def make_token_classification_transformer(
             The doc._.{doc_extension_trf_data} attribute is set prior to calling the callback
             as well as doc._.{doc_extension_prediction} and doc._.{doc_extension_prediction}_prob.
             By default, no additional annotations are set.
+        max_batch_items (int): The maximum number of items to process in a batch.
+        doc_extension_trf_data (str): The name of the doc extension to store the
+            transformer data in.
+        doc_extension_prediction (str): The name of the doc extension to store the
+            predictions in.
         aggregation_strategy (Literal["first", "average", "max"]): The aggregation
             strategy to use. Chosen to correspond to the aggregation strategies
             used in the `TokenClassificationPipeline` in Huggingface:
@@ -106,7 +113,8 @@ def make_token_classification_transformer(
             when there is ambiguity. “average”: Scores will be averaged first across
             tokens, and then the maximum label is applied. “max”: Word entity will
             simply be the token with the maximum score.
-        labels (List[str]): A list of labels which the transformer model outputs, should be ordered.
+        labels (List[str]): A list of labels which the transformer model outputs, should
+            be ordered.
         predictions_to (Optional[List[Literal["pos", "tag", "ents"]]]): A list of
             attributes the predictions should be written to. Default to None. In which
             case it is inferred from the labels. If the labels are UPOS tags, the
@@ -115,6 +123,21 @@ def make_token_classification_transformer(
             not inferred from the labels, but can be added explicitly.
             Note that if the "pos" attribute is set the labels must be UPOS tags and if
             the "ents" attribute is set the labels must be IOB tags.
+
+    Returns:
+        TokenClassificationTransformer: The constructed component.
+
+    Example:
+        >>> import spacy
+        >>> import spacy_wrap
+        >>>
+        >>> nlp = spacy.blank("en")
+        >>> nlp.add_pipe("token_classification_transformer", config={
+        ...     "model": {"name": "vblagoje/bert-english-uncased-finetuned-pos"}}
+        ... )
+        >>> doc = nlp("My name is Wolfgang and I live in Berlin")
+        >>> print([tok.pos_ for tok in doc])
+        ["PRON", "NOUN", "AUX", "PROPN", "CCONJ", "PRON", "VERB", "ADP", "PROPN"]
     """
     clf_trf = TokenClassificationTransformer(
         vocab=nlp.vocab,