chore: update the doc

IBCNServices · Mar 23, 2021 · e076b3b · e076b3b
1 parent a527c43
commit e076b3b
Show file tree

Hide file tree

Showing 18 changed files with 22 additions and 124 deletions.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,3 +1,8 @@
+gensim
+rdflib
+scikit-learn
+sphinx
 sphinx-autodoc-typehints
 sphinx-rtd-theme
 sphinxcontrib-apidoc
+tomlkit
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -171,7 +171,7 @@ deps =
     sphinxcontrib-apidoc
     tomlkit
 commands =
-    sphinx-build -n -T docs docs/_build/html
+    sphinx-build -n -T -W docs docs/_build/html
     python -m doctest README.rst
 
 [testenv:lint]

diff --git a/pyrdf2vec/connectors.py b/pyrdf2vec/connectors.py
@@ -24,14 +24,12 @@ class Connector(ABC):
     endpoint: str = attr.ib(
         validator=attr.validators.instance_of(str),
     )
-    """The endpoint to execute the queries."""
 
     cache: Cache = attr.ib(
         kw_only=True,
         factory=lambda: TTLCache(maxsize=1024, ttl=1200),
         validator=attr.validators.optional(attr.validators.instance_of(Cache)),
     )
-    """The policy and size cache to use."""
 
     _headers: Dict[str, str] = attr.ib(
         init=False,
@@ -40,16 +38,13 @@ class Connector(ABC):
             "Accept": "application/sparql-results+json",
         },
     )
-    """The HTTP headers to use."""
 
     _asession = attr.ib(init=False, default=None)
-    """The aiohttp session to use for asynchrone requests."""
 
     _session = attr.ib(
         init=False,
         factory=lambda: requests.Session(),
     )
-    """The requests session to use for synchrone requests."""
 
     async def close(self) -> None:
         """Closes the aiohttp session."""

diff --git a/pyrdf2vec/embedders/word2vec.py b/pyrdf2vec/embedders/word2vec.py
@@ -17,12 +17,9 @@ class Word2Vec(Embedder):
 
     """
 
-    kwargs = attr.ib(init=False, default=None)
-    """The keyword arguments dictionary.
-    Defaults to {size=500, min_count=0, negative=20}.
-    """
+    # kwargs = attr.ib(init=False, default=None)
 
-    _model: W2V = attr.ib(init=False, default=None, repr=False)
+    # _model = attr.ib(init=False, type=W2V, default=None, repr=False)
 
     def __init__(self, **kwargs):
         self.kwargs = {

diff --git a/pyrdf2vec/graphs/kg.py b/pyrdf2vec/graphs/kg.py
@@ -28,81 +28,60 @@ class KG:
             _check_location,
         ],
     )
-    """The location of the file to load."""
 
     skip_predicates: Set[str] = attr.ib(
         factory=set,
         validator=attr.validators.deep_iterable(
             member_validator=attr.validators.instance_of(str)
         ),
     )
-    """The label predicates to skip from the KG."""
 
     literals: List[List[str]] = attr.ib(  # type: ignore
         factory=list,
         validator=attr.validators.deep_iterable(
             member_validator=attr.validators.instance_of(List)
         ),
     )
-    """The predicate chains to get the literals."""
 
     fmt: Optional[str] = attr.ib(
         kw_only=True,
         default=None,
         validator=attr.validators.optional(attr.validators.instance_of(str)),
     )
-    """The format of the file.
-    It should be used only if the format can not be determined from source.
-    """
 
     mul_req: bool = attr.ib(
         kw_only=True,
         default=False,
         validator=attr.validators.instance_of(bool),
     )
-    """True to allow bundling of SPARQL queries, False otherwise.
-    This attribute accelerates the extraction of walks for remote Knowledge
-    Graphs. Beware that this may violate the policy of some SPARQL endpoint
-    server.
-    """
 
     cache: Cache = attr.ib(
         kw_only=True,
         factory=lambda: TTLCache(maxsize=1024, ttl=1200),
         validator=attr.validators.optional(attr.validators.instance_of(Cache)),
     )
-    """The policy and size cache to use.
-    Defaults to TTLCache(maxsize=1024, ttl=1200)
-    """
 
     connector: SPARQLConnector = attr.ib(default=None, init=False, repr=False)
-    """The connector to use."""
 
     _is_remote: bool = attr.ib(
         default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the Knowledge Graph is in remote, False otherwise."""
 
     _inv_transition_matrix: DefaultDict[Vertex, Set[Vertex]] = attr.ib(
         init=False, repr=False, factory=lambda: defaultdict(set)
     )
-    """Contains the parents of vertices."""
 
     _transition_matrix: DefaultDict[Vertex, Set[Vertex]] = attr.ib(
         init=False, repr=False, factory=lambda: defaultdict(set)
     )
-    """Contains the children of vertices."""
 
     _entity_hops: Dict[str, List[Hop]] = attr.ib(
         init=False, repr=False, factory=dict
     )
-    """Caches the results of asynchronous requests."""
 
     _entities: Set[Vertex] = attr.ib(init=False, repr=False, factory=set)
-    """Stores the entities."""
 
     _vertices: Set[Vertex] = attr.ib(init=False, repr=False, factory=set)
-    """Stores the vertices."""
 
     def __attrs_post_init__(self):
         if self.location is not None:

diff --git a/pyrdf2vec/graphs/vertex.py b/pyrdf2vec/graphs/vertex.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 from typing import Any, Optional
 
 import attr
@@ -10,20 +8,13 @@ class Vertex:
     """Represents a vertex in a Knowledge Graph."""
 
     name: str = attr.ib(validator=attr.validators.instance_of(str))
-    """The name of vertex."""
-
     predicate: bool = attr.ib(
         default=False,
         validator=attr.validators.instance_of(bool),
         repr=False,
     )
-    """True if the vertex is a predicate. False, otherwise."""
-
-    vprev: Optional[Vertex] = attr.ib(default=None, repr=False)
-    """The previous vertex."""
-
-    vnext: Optional[Vertex] = attr.ib(default=None, repr=False)
-    """The next vertex."""
+    vprev: Optional["Vertex"] = attr.ib(default=None, repr=False)
+    vnext: Optional["Vertex"] = attr.ib(default=None, repr=False)
 
     def __eq__(self, other: Any) -> bool:
         """Defines behavior for the equality operator, ==.

diff --git a/pyrdf2vec/rdf2vec.py b/pyrdf2vec/rdf2vec.py
@@ -17,53 +17,41 @@
 class RDF2VecTransformer:
     """Transforms nodes in a Knowledge Graph into an embedding."""
 
-    embedder: Embedder = attr.ib(
+    embedder = attr.ib(
         factory=lambda: Word2Vec(),
+        type=Embedder,
         validator=attr.validators.instance_of(Embedder),  # type: ignore
     )
-    """The embedding technique."""
 
-    walkers: Sequence[Walker] = attr.ib(
+    walkers = attr.ib(
         factory=lambda: [RandomWalker(2)],  # type: ignore
+        type=Sequence[Walker],
         validator=attr.validators.deep_iterable(
             member_validator=attr.validators.instance_of(
                 Walker  # type: ignore
             ),
             iterable_validator=attr.validators.instance_of(list),
         ),
     )
-    """The walking strategy."""
 
     verbose: int = attr.ib(
         kw_only=True, default=0, validator=attr.validators.in_([0, 1, 2])
     )
-    """The verbosity level.
-           0: does not display anything;
-           1: display of the progress of extraction and training of walks;
-           2: debugging.
-    """
 
     _embeddings: Embeddings = attr.ib(init=False, factory=list)
-    """All the embeddings of the model."""
 
     _entities: Entities = attr.ib(init=False, factory=list)
-    """All the entities of the model."""
 
     _literals: Literals = attr.ib(init=False, factory=list)
-    """All the literals of the model."""
 
     _walks: List[str] = attr.ib(init=False, factory=list)
-    """All the walks of the model."""
 
     _is_extract_walks_literals = attr.ib(
         init=False,
         repr=False,
         default=False,
         validator=attr.validators.instance_of(bool),
     )
-    """True if the session must be closed after the call to the `transform`
-    function. False, otherwise.
-    """
 
     def fit(
         self, walks: List[str], is_update: bool = False

diff --git a/pyrdf2vec/samplers/frequency.py b/pyrdf2vec/samplers/frequency.py
@@ -21,7 +21,6 @@ class ObjFreqSampler(Sampler):
     _counts: DefaultDict[str, int] = attr.ib(
         init=False, repr=False, factory=lambda: defaultdict(dict)
     )
-    """Counter for vertices."""
 
     def fit(self, kg: KG) -> None:
         """Fits the sampling strategy by counting the number of available

diff --git a/pyrdf2vec/samplers/pagerank.py b/pyrdf2vec/samplers/pagerank.py
@@ -16,24 +16,17 @@ class PageRankSampler(Sampler):
     nodes are more important than others and hence there will be resources
     which are more frequent in the walks as others.
 
-    Args:
-
-        alpha: The damping for PageRank.
-            Defaults to 0.85.
-
     """
 
     alpha: float = attr.ib(
         kw_only=True,
         default=0.85,
         validator=attr.validators.instance_of(float),
     )
-    """The damping for Page Rank."""
 
     _pageranks: Dict[str, float] = attr.ib(
         init=False, repr=False, factory=dict
     )
-    """The Page Rank dictionary."""
 
     def fit(self, kg: KG) -> None:
         """Fits the sampling strategy by running PageRank on a provided KG

diff --git a/pyrdf2vec/samplers/sampler.py b/pyrdf2vec/samplers/sampler.py
@@ -25,38 +25,26 @@ class Sampler(ABC):
     inverse: bool = attr.ib(
         default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the inverse algorithm must be used, False otherwise."""
 
     split: bool = attr.ib(
         default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the split algorithm must be used, False otherwise."""
 
     _is_support_remote: bool = attr.ib(init=False, repr=False, default=False)
-    """True if the sampling strategy can be used with a remote Knowledge Graph,
-    False Otherwise.
-    """
 
     _random_state: Optional[int] = attr.ib(
         init=False,
         repr=False,
         default=None,
     )
-    """The random state to use to keep random determinism with the sampling
-    strategy.
-    """
 
     _vertices_deg: Dict[str, int] = attr.ib(
         init=False, repr=False, factory=dict
     )
-    """The degree of the vertices."""
 
     _visited: Set[Tuple[Hop, int]] = attr.ib(
         init=False, repr=False, factory=set
     )
-    """Tags vertices that appear at the max depth or of which all their
-    children are tagged.
-    """
 
     @abstractmethod
     def fit(self, kg: KG) -> None:

diff --git a/pyrdf2vec/samplers/uniform.py b/pyrdf2vec/samplers/uniform.py
@@ -18,17 +18,12 @@ class UniformSampler(Sampler):
     inverse: bool = attr.ib(
         init=False, default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the inverse algorithm must be used, False otherwise."""
 
     split: bool = attr.ib(
         init=False, default=False, validator=attr.validators.instance_of(bool)
     )
-    """True if the split algorithm must be used, False otherwise."""
 
     _is_support_remote: bool = attr.ib(init=False, repr=False, default=True)
-    """True if the sampling strategy can be used with a remote Knowledge Graph,
-    False Otherwise.
-    """
 
     def fit(self, kg: KG) -> None:
         """Since the weights are uniform, this function does nothing.

diff --git a/pyrdf2vec/typings.py b/pyrdf2vec/typings.py
@@ -1,12 +1,12 @@
-from typing import TYPE_CHECKING, Dict, List, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
 
 if TYPE_CHECKING:
     from pyrdf2vec.graphs import Vertex  # noqa: F401
 
-Hop = Tuple["Vertex", "Vertex"]
+Hop = Tuple[Any, Any]
 
 SWalk = Tuple[str, ...]
-Walk = Tuple["Vertex", ...]
+Walk = Tuple[Any, ...]
 
 Embeddings = List[str]
 

diff --git a/pyrdf2vec/walkers/community.py b/pyrdf2vec/walkers/community.py
@@ -40,17 +40,12 @@ class CommunityWalker(Walker):
     hop_prob: float = attr.ib(
         kw_only=True, default=0.1, validator=attr.validators.instance_of(float)
     )
-    """The probability to hop."""
 
     resolution: int = attr.ib(
         kw_only=True, default=1, validator=attr.validators.instance_of(int)
     )
-    """The resolution to use."""
 
     _is_support_remote: bool = attr.ib(init=False, repr=False, default=False)
-    """True if the walking strategy can be used with a remote Knowledge Graph,
-    False Otherwise.
-    """
 
     def _community_detection(self, kg: KG) -> None:
         """Converts the knowledge graph to a networkX graph.

diff --git a/pyrdf2vec/walkers/halk.py b/pyrdf2vec/walkers/halk.py
@@ -25,7 +25,6 @@ class HALKWalker(RandomWalker):
             iterable_validator=attr.validators.instance_of(list),
         ),
     )
-    """The minimum frequency thresholds of a hop to be kept."""
 
     def _extract(self, kg: KG, instance: Vertex) -> EntityWalks:
         """Extracts walks rooted at the provided entities which are then each