From e076b3b923045076be6be272478d3d11e0ffae32 Mon Sep 17 00:00:00 2001
From: Terencio Agozzino <terencio.agozzino@gmail.com>
Date: Tue, 23 Mar 2021 21:25:44 +0100
Subject: [PATCH] chore: update the doc

---
 docs/requirements.txt                  |  5 +++++
 poetry.lock                            |  6 +++---
 pyproject.toml                         |  2 +-
 pyrdf2vec/connectors.py                |  5 -----
 pyrdf2vec/embedders/word2vec.py        |  7 ++-----
 pyrdf2vec/graphs/kg.py                 | 21 ---------------------
 pyrdf2vec/graphs/vertex.py             | 13 ++-----------
 pyrdf2vec/rdf2vec.py                   | 20 ++++----------------
 pyrdf2vec/samplers/frequency.py        |  1 -
 pyrdf2vec/samplers/pagerank.py         |  7 -------
 pyrdf2vec/samplers/sampler.py          | 12 ------------
 pyrdf2vec/samplers/uniform.py          |  5 -----
 pyrdf2vec/typings.py                   |  6 +++---
 pyrdf2vec/walkers/community.py         |  5 -----
 pyrdf2vec/walkers/halk.py              |  1 -
 pyrdf2vec/walkers/ngram.py             |  5 -----
 pyrdf2vec/walkers/walker.py            | 19 ++-----------------
 pyrdf2vec/walkers/weisfeiler_lehman.py |  6 ------
 18 files changed, 22 insertions(+), 124 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 932362a1..bacdf824 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,8 @@
+gensim
+rdflib
+scikit-learn
+sphinx
 sphinx-autodoc-typehints
 sphinx-rtd-theme
 sphinxcontrib-apidoc
+tomlkit
diff --git a/poetry.lock b/poetry.lock
index fad8419e..680b4ff7 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -317,7 +317,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 
 [[package]]
 name = "importlib-metadata"
-version = "3.7.3"
+version = "3.4.0"
 description = "Read metadata from Python packages"
 category = "main"
 optional = false
@@ -1513,8 +1513,8 @@ imagesize = [
     {file = "imagesize-1.2.0.tar.gz", hash = "sha256:b1f6b5a4eab1f73479a50fb79fcf729514a900c341d8503d62a62dbc4127a2b1"},
 ]
 importlib-metadata = [
-    {file = "importlib_metadata-3.7.3-py3-none-any.whl", hash = "sha256:b74159469b464a99cb8cc3e21973e4d96e05d3024d337313fedb618a6e86e6f4"},
-    {file = "importlib_metadata-3.7.3.tar.gz", hash = "sha256:742add720a20d0467df2f444ae41704000f50e1234f46174b51f9c6031a1bd71"},
+    {file = "importlib_metadata-3.4.0-py3-none-any.whl", hash = "sha256:ace61d5fc652dc280e7b6b4ff732a9c2d40db2c0f92bc6cb74e07b73d53a1771"},
+    {file = "importlib_metadata-3.4.0.tar.gz", hash = "sha256:fa5daa4477a7414ae34e95942e4dd07f62adf589143c875c133c1e53c4eff38d"},
 ]
 incremental = [
     {file = "incremental-21.3.0-py2.py3-none-any.whl", hash = "sha256:92014aebc6a20b78a8084cdd5645eeaa7f74b8933f70fa3ada2cfbd1e3b54321"},
diff --git a/pyproject.toml b/pyproject.toml
index 544945fb..ab663dc0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -171,7 +171,7 @@ deps =
     sphinxcontrib-apidoc
     tomlkit
 commands =
-    sphinx-build -n -T docs docs/_build/html
+    sphinx-build -n -T -W docs docs/_build/html
     python -m doctest README.rst
 
 [testenv:lint]
diff --git a/pyrdf2vec/connectors.py b/pyrdf2vec/connectors.py
index 6ffac1aa..8995dab5 100644
--- a/pyrdf2vec/connectors.py
+++ b/pyrdf2vec/connectors.py
@@ -24,14 +24,12 @@ class Connector(ABC):
     endpoint: str = attr.ib(
         validator=attr.validators.instance_of(str),
     )
-    """The endpoint to execute the queries."""
 
     cache: Cache = attr.ib(
         kw_only=True,
         factory=lambda: TTLCache(maxsize=1024, ttl=1200),
         validator=attr.validators.optional(attr.validators.instance_of(Cache)),
     )
-    """The policy and size cache to use."""
 
     _headers: Dict[str, str] = attr.ib(
         init=False,
@@ -40,16 +38,13 @@ class Connector(ABC):
             "Accept": "application/sparql-results+json",
         },
     )
-    """The HTTP headers to use."""
 
     _asession = attr.ib(init=False, default=None)
-    """The aiohttp session to use for asynchrone requests."""
 
     _session = attr.ib(
         init=False,
         factory=lambda: requests.Session(),
     )
-    """The requests session to use for synchrone requests."""
 
     async def close(self) -> None:
         """Closes the aiohttp session."""
diff --git a/pyrdf2vec/embedders/word2vec.py b/pyrdf2vec/embedders/word2vec.py
index cf8b7c95..a5d30a8b 100644
--- a/pyrdf2vec/embedders/word2vec.py
+++ b/pyrdf2vec/embedders/word2vec.py
@@ -17,12 +17,9 @@ class Word2Vec(Embedder):
 
     """
 
-    kwargs = attr.ib(init=False, default=None)
-    """The keyword arguments dictionary.
-    Defaults to {size=500, min_count=0, negative=20}.
-    """
+    # kwargs = attr.ib(init=False, default=None)
 
-    _model: W2V = attr.ib(init=False, default=None, repr=False)
+    # _model = attr.ib(init=False, type=W2V, default=None, repr=False)
 
     def __init__(self, **kwargs):
         self.kwargs = {
diff --git a/pyrdf2vec/graphs/kg.py b/pyrdf2vec/graphs/kg.py
index 3f1bb7d4..3bf28196 100644
--- a/pyrdf2vec/graphs/kg.py
+++ b/pyrdf2vec/graphs/kg.py
@@ -28,7 +28,6 @@ class KG:
             _check_location,
         ],
     )
-    """The location of the file to load."""
 
     skip_predicates: Set[str] = attr.ib(
         factory=set,
@@ -36,7 +35,6 @@ class KG:
             member_validator=attr.validators.instance_of(str)
         ),
     )
-    """The label predicates to skip from the KG."""
 
     literals: List[List[str]] = attr.ib(  # type: ignore
         factory=list,
@@ -44,65 +42,46 @@ class KG:
             member_validator=attr.validators.instance_of(List)
         ),
     )
-    """The predicate chains to get the literals."""
 
     fmt: Optional[str] = attr.ib(
         kw_only=True,
         default=None,
         validator=attr.validators.optional(attr.validators.instance_of(str)),
     )
-    """The format of the file.
-    It should be used only if the format can not be determined from source.
-    """
 
     mul_req: bool = attr.ib(
         kw_only=True,
         default=False,
         validator=attr.validators.instance_of(bool),
     )
-    """True to allow bundling of SPARQL queries, False otherwise.
-    This attribute accelerates the extraction of walks for remote Knowledge
-    Graphs. Beware that this may violate the policy of some SPARQL endpoint
-    server.
-    """
 
     cache: Cache = attr.ib(
         kw_only=True,
         factory=lambda: TTLCache(maxsize=1024, ttl=1200),
         validator=attr.validators.optional(attr.validators.instance_of(Cache)),
     )
-    """The policy and size cache to use.
-    Defaults to TTLCache(maxsize=1024, ttl=1200)
-    """
 
     connector: SPARQLConnector = attr.ib(default=None, init=False, repr=False)
-    """The connector to use."""
 
     _is_remote: bool = attr.ib(
         default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the Knowledge Graph is in remote, False otherwise."""
 
     _inv_transition_matrix: DefaultDict[Vertex, Set[Vertex]] = attr.ib(
         init=False, repr=False, factory=lambda: defaultdict(set)
     )
-    """Contains the parents of vertices."""
 
     _transition_matrix: DefaultDict[Vertex, Set[Vertex]] = attr.ib(
         init=False, repr=False, factory=lambda: defaultdict(set)
     )
-    """Contains the children of vertices."""
 
     _entity_hops: Dict[str, List[Hop]] = attr.ib(
         init=False, repr=False, factory=dict
     )
-    """Caches the results of asynchronous requests."""
 
     _entities: Set[Vertex] = attr.ib(init=False, repr=False, factory=set)
-    """Stores the entities."""
 
     _vertices: Set[Vertex] = attr.ib(init=False, repr=False, factory=set)
-    """Stores the vertices."""
 
     def __attrs_post_init__(self):
         if self.location is not None:
diff --git a/pyrdf2vec/graphs/vertex.py b/pyrdf2vec/graphs/vertex.py
index d2078507..df19b2d4 100644
--- a/pyrdf2vec/graphs/vertex.py
+++ b/pyrdf2vec/graphs/vertex.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 from typing import Any, Optional
 
 import attr
@@ -10,20 +8,13 @@ class Vertex:
     """Represents a vertex in a Knowledge Graph."""
 
     name: str = attr.ib(validator=attr.validators.instance_of(str))
-    """The name of vertex."""
-
     predicate: bool = attr.ib(
         default=False,
         validator=attr.validators.instance_of(bool),
         repr=False,
     )
-    """True if the vertex is a predicate. False, otherwise."""
-
-    vprev: Optional[Vertex] = attr.ib(default=None, repr=False)
-    """The previous vertex."""
-
-    vnext: Optional[Vertex] = attr.ib(default=None, repr=False)
-    """The next vertex."""
+    vprev: Optional["Vertex"] = attr.ib(default=None, repr=False)
+    vnext: Optional["Vertex"] = attr.ib(default=None, repr=False)
 
     def __eq__(self, other: Any) -> bool:
         """Defines behavior for the equality operator, ==.
diff --git a/pyrdf2vec/rdf2vec.py b/pyrdf2vec/rdf2vec.py
index 05f30f2b..94a281e7 100644
--- a/pyrdf2vec/rdf2vec.py
+++ b/pyrdf2vec/rdf2vec.py
@@ -17,14 +17,15 @@
 class RDF2VecTransformer:
     """Transforms nodes in a Knowledge Graph into an embedding."""
 
-    embedder: Embedder = attr.ib(
+    embedder = attr.ib(
         factory=lambda: Word2Vec(),
+        type=Embedder,
         validator=attr.validators.instance_of(Embedder),  # type: ignore
     )
-    """The embedding technique."""
 
-    walkers: Sequence[Walker] = attr.ib(
+    walkers = attr.ib(
         factory=lambda: [RandomWalker(2)],  # type: ignore
+        type=Sequence[Walker],
         validator=attr.validators.deep_iterable(
             member_validator=attr.validators.instance_of(
                 Walker  # type: ignore
@@ -32,28 +33,18 @@ class RDF2VecTransformer:
             iterable_validator=attr.validators.instance_of(list),
         ),
     )
-    """The walking strategy."""
 
     verbose: int = attr.ib(
         kw_only=True, default=0, validator=attr.validators.in_([0, 1, 2])
     )
-    """The verbosity level.
-           0: does not display anything;
-           1: display of the progress of extraction and training of walks;
-           2: debugging.
-    """
 
     _embeddings: Embeddings = attr.ib(init=False, factory=list)
-    """All the embeddings of the model."""
 
     _entities: Entities = attr.ib(init=False, factory=list)
-    """All the entities of the model."""
 
     _literals: Literals = attr.ib(init=False, factory=list)
-    """All the literals of the model."""
 
     _walks: List[str] = attr.ib(init=False, factory=list)
-    """All the walks of the model."""
 
     _is_extract_walks_literals = attr.ib(
         init=False,
@@ -61,9 +52,6 @@ class RDF2VecTransformer:
         default=False,
         validator=attr.validators.instance_of(bool),
     )
-    """True if the session must be closed after the call to the `transform`
-    function. False, otherwise.
-    """
 
     def fit(
         self, walks: List[str], is_update: bool = False
diff --git a/pyrdf2vec/samplers/frequency.py b/pyrdf2vec/samplers/frequency.py
index 6667c8d1..2f820f30 100644
--- a/pyrdf2vec/samplers/frequency.py
+++ b/pyrdf2vec/samplers/frequency.py
@@ -21,7 +21,6 @@ class ObjFreqSampler(Sampler):
     _counts: DefaultDict[str, int] = attr.ib(
         init=False, repr=False, factory=lambda: defaultdict(dict)
     )
-    """Counter for vertices."""
 
     def fit(self, kg: KG) -> None:
         """Fits the sampling strategy by counting the number of available
diff --git a/pyrdf2vec/samplers/pagerank.py b/pyrdf2vec/samplers/pagerank.py
index 8bcb5ee0..af3d3866 100644
--- a/pyrdf2vec/samplers/pagerank.py
+++ b/pyrdf2vec/samplers/pagerank.py
@@ -16,11 +16,6 @@ class PageRankSampler(Sampler):
     nodes are more important than others and hence there will be resources
     which are more frequent in the walks as others.
 
-    Args:
-
-        alpha: The damping for PageRank.
-            Defaults to 0.85.
-
     """
 
     alpha: float = attr.ib(
@@ -28,12 +23,10 @@ class PageRankSampler(Sampler):
         default=0.85,
         validator=attr.validators.instance_of(float),
     )
-    """The damping for Page Rank."""
 
     _pageranks: Dict[str, float] = attr.ib(
         init=False, repr=False, factory=dict
     )
-    """The Page Rank dictionary."""
 
     def fit(self, kg: KG) -> None:
         """Fits the sampling strategy by running PageRank on a provided KG
diff --git a/pyrdf2vec/samplers/sampler.py b/pyrdf2vec/samplers/sampler.py
index 4d40bff7..a73ee997 100644
--- a/pyrdf2vec/samplers/sampler.py
+++ b/pyrdf2vec/samplers/sampler.py
@@ -25,38 +25,26 @@ class Sampler(ABC):
     inverse: bool = attr.ib(
         default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the inverse algorithm must be used, False otherwise."""
 
     split: bool = attr.ib(
         default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the split algorithm must be used, False otherwise."""
 
     _is_support_remote: bool = attr.ib(init=False, repr=False, default=False)
-    """True if the sampling strategy can be used with a remote Knowledge Graph,
-    False Otherwise.
-    """
 
     _random_state: Optional[int] = attr.ib(
         init=False,
         repr=False,
         default=None,
     )
-    """The random state to use to keep random determinism with the sampling
-    strategy.
-    """
 
     _vertices_deg: Dict[str, int] = attr.ib(
         init=False, repr=False, factory=dict
     )
-    """The degree of the vertices."""
 
     _visited: Set[Tuple[Hop, int]] = attr.ib(
         init=False, repr=False, factory=set
     )
-    """Tags vertices that appear at the max depth or of which all their
-    children are tagged.
-    """
 
     @abstractmethod
     def fit(self, kg: KG) -> None:
diff --git a/pyrdf2vec/samplers/uniform.py b/pyrdf2vec/samplers/uniform.py
index 3e2b1738..b2e50575 100644
--- a/pyrdf2vec/samplers/uniform.py
+++ b/pyrdf2vec/samplers/uniform.py
@@ -18,17 +18,12 @@ class UniformSampler(Sampler):
     inverse: bool = attr.ib(
         init=False, default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the inverse algorithm must be used, False otherwise."""
 
     split: bool = attr.ib(
         init=False, default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the split algorithm must be used, False otherwise."""
 
     _is_support_remote: bool = attr.ib(init=False, repr=False, default=True)
-    """True if the sampling strategy can be used with a remote Knowledge Graph,
-    False Otherwise.
-    """
 
     def fit(self, kg: KG) -> None:
         """Since the weights are uniform, this function does nothing.
diff --git a/pyrdf2vec/typings.py b/pyrdf2vec/typings.py
index 9c319ca6..e4eabd7e 100644
--- a/pyrdf2vec/typings.py
+++ b/pyrdf2vec/typings.py
@@ -1,12 +1,12 @@
-from typing import TYPE_CHECKING, Dict, List, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
 
 if TYPE_CHECKING:
     from pyrdf2vec.graphs import Vertex  # noqa: F401
 
-Hop = Tuple["Vertex", "Vertex"]
+Hop = Tuple[Any, Any]
 
 SWalk = Tuple[str, ...]
-Walk = Tuple["Vertex", ...]
+Walk = Tuple[Any, ...]
 
 Embeddings = List[str]
 
diff --git a/pyrdf2vec/walkers/community.py b/pyrdf2vec/walkers/community.py
index 7e2e8b0b..d1bccad4 100644
--- a/pyrdf2vec/walkers/community.py
+++ b/pyrdf2vec/walkers/community.py
@@ -40,17 +40,12 @@ class CommunityWalker(Walker):
     hop_prob: float = attr.ib(
         kw_only=True, default=0.1, validator=attr.validators.instance_of(float)
     )
-    """The probability to hop."""
 
     resolution: int = attr.ib(
         kw_only=True, default=1, validator=attr.validators.instance_of(int)
     )
-    """The resolution to use."""
 
     _is_support_remote: bool = attr.ib(init=False, repr=False, default=False)
-    """True if the walking strategy can be used with a remote Knowledge Graph,
-    False Otherwise.
-    """
 
     def _community_detection(self, kg: KG) -> None:
         """Converts the knowledge graph to a networkX graph.
diff --git a/pyrdf2vec/walkers/halk.py b/pyrdf2vec/walkers/halk.py
index 4d582dfa..25264849 100644
--- a/pyrdf2vec/walkers/halk.py
+++ b/pyrdf2vec/walkers/halk.py
@@ -25,7 +25,6 @@ class HALKWalker(RandomWalker):
             iterable_validator=attr.validators.instance_of(list),
         ),
     )
-    """The minimum frequency thresholds of a hop to be kept."""
 
     def _extract(self, kg: KG, instance: Vertex) -> EntityWalks:
         """Extracts walks rooted at the provided entities which are then each
diff --git a/pyrdf2vec/walkers/ngram.py b/pyrdf2vec/walkers/ngram.py
index 8c1d48a6..737c04b4 100644
--- a/pyrdf2vec/walkers/ngram.py
+++ b/pyrdf2vec/walkers/ngram.py
@@ -21,21 +21,16 @@ class NGramWalker(RandomWalker):
     grams: int = attr.ib(
         kw_only=True, default=3, validator=attr.validators.instance_of(int)
     )
-    """The N-gram to relabel."""
 
     wildcards: list = attr.ib(
         kw_only=True,
         default=None,
         validator=attr.validators.optional(attr.validators.instance_of(list)),
     )
-    """The wildcards to be used to match sub-sequences with small differences
-    to be mapped onto the same label.
-    """
 
     _n_gram_map: Dict[Tuple, str] = attr.ib(
         init=False, repr=False, factory=dict
     )
-    """Stores the mapping of N-gram."""
 
     def _take_n_grams(self, walk: Walk) -> List[str]:
         """Takes the N-Grams.
diff --git a/pyrdf2vec/walkers/walker.py b/pyrdf2vec/walkers/walker.py
index 47f65f88..162118bb 100644
--- a/pyrdf2vec/walkers/walker.py
+++ b/pyrdf2vec/walkers/walker.py
@@ -31,12 +31,10 @@ class Walker(ABC):
     """Base class of the walking strategies."""
 
     kg: Optional[KG] = None
-    """Global KG used later on for the worker process."""
 
     max_depth: int = attr.ib(
         validator=[attr.validators.instance_of(int), _check_max_depth]
     )
-    """The maximum depth of one walk."""
 
     max_walks: Optional[int] = attr.ib(  # type: ignore
         default=None,
@@ -45,13 +43,12 @@ class Walker(ABC):
             _check_max_walks,
         ],
     )
-    """The maximum number of walks per entity."""
 
-    sampler: Sampler = attr.ib(
+    sampler = attr.ib(
         factory=lambda: UniformSampler(),
+        type=Sampler,
         validator=attr.validators.instance_of(Sampler),  # type: ignore
     )
-    """The sampling strategy."""
 
     n_jobs: Optional[int] = attr.ib(  # type: ignore
         default=None,
@@ -60,32 +57,20 @@ class Walker(ABC):
             _check_jobs,
         ],
     )
-    """The number of CPU cores used when parallelizing.
-    None means 1. -1 means using all processors.
-    """
 
     with_reverse: Optional[bool] = attr.ib(
         kw_only=True,
         default=False,
         validator=attr.validators.instance_of(bool),
     )
-    """True to extracts children's and parents' walks from the root,
-    creating (max_walks * max_walks) more walks of 2 * depth, False otherwise.
-    """
 
     random_state: Optional[int] = attr.ib(
         kw_only=True,
         default=None,
         validator=attr.validators.optional(attr.validators.instance_of(int)),
     )
-    """The random state to use to keep random determinism with the walking
-    strategy.
-    """
 
     _is_support_remote: bool = attr.ib(init=False, repr=False, default=True)
-    """True if the walking strategy can be used with a remote Knowledge Graph,
-    False Otherwise.
-    """
 
     def __attrs_post_init__(self):
         if self.n_jobs == -1:
diff --git a/pyrdf2vec/walkers/weisfeiler_lehman.py b/pyrdf2vec/walkers/weisfeiler_lehman.py
index 70180821..819439fc 100644
--- a/pyrdf2vec/walkers/weisfeiler_lehman.py
+++ b/pyrdf2vec/walkers/weisfeiler_lehman.py
@@ -16,22 +16,16 @@ class WLWalker(RandomWalker):
     wl_iterations: int = attr.ib(
         kw_only=True, default=4, validator=attr.validators.instance_of(int)
     )
-    """The Weisfeiler Lehman's iteration."""
 
     _is_support_remote: bool = attr.ib(init=False, repr=False, default=False)
-    """True if the walking strategy can be used with a remote Knowledge Graph,
-    False Otherwise.
-    """
 
     _inv_label_map: DefaultDict[
         Vertex, Dict[Union[str, int], Union[str, int]]
     ] = attr.ib(init=False, repr=False, factory=lambda: defaultdict(dict))
-    """Stores the mapping of the inverse labels."""
 
     _label_map: DefaultDict[Vertex, Dict[int, str]] = attr.ib(
         init=False, repr=False, factory=lambda: defaultdict(dict)
     )
-    """Stores the mapping of the labels."""
 
     def _create_label(self, kg: KG, vertex: Vertex, n: int) -> str:
         """Creates a label according to a vertex and its neighbors.