Skip to content

Commit

Permalink
chore: update the doc
Browse files Browse the repository at this point in the history
  • Loading branch information
rememberYou committed Mar 23, 2021
1 parent a527c43 commit e076b3b
Show file tree
Hide file tree
Showing 18 changed files with 22 additions and 124 deletions.
5 changes: 5 additions & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
gensim
rdflib
scikit-learn
sphinx
sphinx-autodoc-typehints
sphinx-rtd-theme
sphinxcontrib-apidoc
tomlkit
6 changes: 3 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ deps =
sphinxcontrib-apidoc
tomlkit
commands =
sphinx-build -n -T docs docs/_build/html
sphinx-build -n -T -W docs docs/_build/html
python -m doctest README.rst
[testenv:lint]
Expand Down
5 changes: 0 additions & 5 deletions pyrdf2vec/connectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,12 @@ class Connector(ABC):
endpoint: str = attr.ib(
validator=attr.validators.instance_of(str),
)
"""The endpoint to execute the queries."""

cache: Cache = attr.ib(
kw_only=True,
factory=lambda: TTLCache(maxsize=1024, ttl=1200),
validator=attr.validators.optional(attr.validators.instance_of(Cache)),
)
"""The policy and size cache to use."""

_headers: Dict[str, str] = attr.ib(
init=False,
Expand All @@ -40,16 +38,13 @@ class Connector(ABC):
"Accept": "application/sparql-results+json",
},
)
"""The HTTP headers to use."""

_asession = attr.ib(init=False, default=None)
"""The aiohttp session to use for asynchrone requests."""

_session = attr.ib(
init=False,
factory=lambda: requests.Session(),
)
"""The requests session to use for synchrone requests."""

async def close(self) -> None:
"""Closes the aiohttp session."""
Expand Down
7 changes: 2 additions & 5 deletions pyrdf2vec/embedders/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,9 @@ class Word2Vec(Embedder):
"""

kwargs = attr.ib(init=False, default=None)
"""The keyword arguments dictionary.
Defaults to {size=500, min_count=0, negative=20}.
"""
# kwargs = attr.ib(init=False, default=None)

_model: W2V = attr.ib(init=False, default=None, repr=False)
# _model = attr.ib(init=False, type=W2V, default=None, repr=False)

def __init__(self, **kwargs):
self.kwargs = {
Expand Down
21 changes: 0 additions & 21 deletions pyrdf2vec/graphs/kg.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,81 +28,60 @@ class KG:
_check_location,
],
)
"""The location of the file to load."""

skip_predicates: Set[str] = attr.ib(
factory=set,
validator=attr.validators.deep_iterable(
member_validator=attr.validators.instance_of(str)
),
)
"""The label predicates to skip from the KG."""

literals: List[List[str]] = attr.ib( # type: ignore
factory=list,
validator=attr.validators.deep_iterable(
member_validator=attr.validators.instance_of(List)
),
)
"""The predicate chains to get the literals."""

fmt: Optional[str] = attr.ib(
kw_only=True,
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str)),
)
"""The format of the file.
It should be used only if the format can not be determined from source.
"""

mul_req: bool = attr.ib(
kw_only=True,
default=False,
validator=attr.validators.instance_of(bool),
)
"""True to allow bundling of SPARQL queries, False otherwise.
This attribute accelerates the extraction of walks for remote Knowledge
Graphs. Beware that this may violate the policy of some SPARQL endpoint
server.
"""

cache: Cache = attr.ib(
kw_only=True,
factory=lambda: TTLCache(maxsize=1024, ttl=1200),
validator=attr.validators.optional(attr.validators.instance_of(Cache)),
)
"""The policy and size cache to use.
Defaults to TTLCache(maxsize=1024, ttl=1200)
"""

connector: SPARQLConnector = attr.ib(default=None, init=False, repr=False)
"""The connector to use."""

_is_remote: bool = attr.ib(
default=False, validator=attr.validators.instance_of(bool)
)
"""True if the Knowledge Graph is in remote, False otherwise."""

_inv_transition_matrix: DefaultDict[Vertex, Set[Vertex]] = attr.ib(
init=False, repr=False, factory=lambda: defaultdict(set)
)
"""Contains the parents of vertices."""

_transition_matrix: DefaultDict[Vertex, Set[Vertex]] = attr.ib(
init=False, repr=False, factory=lambda: defaultdict(set)
)
"""Contains the children of vertices."""

_entity_hops: Dict[str, List[Hop]] = attr.ib(
init=False, repr=False, factory=dict
)
"""Caches the results of asynchronous requests."""

_entities: Set[Vertex] = attr.ib(init=False, repr=False, factory=set)
"""Stores the entities."""

_vertices: Set[Vertex] = attr.ib(init=False, repr=False, factory=set)
"""Stores the vertices."""

def __attrs_post_init__(self):
if self.location is not None:
Expand Down
13 changes: 2 additions & 11 deletions pyrdf2vec/graphs/vertex.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from __future__ import annotations

from typing import Any, Optional

import attr
Expand All @@ -10,20 +8,13 @@ class Vertex:
"""Represents a vertex in a Knowledge Graph."""

name: str = attr.ib(validator=attr.validators.instance_of(str))
"""The name of vertex."""

predicate: bool = attr.ib(
default=False,
validator=attr.validators.instance_of(bool),
repr=False,
)
"""True if the vertex is a predicate. False, otherwise."""

vprev: Optional[Vertex] = attr.ib(default=None, repr=False)
"""The previous vertex."""

vnext: Optional[Vertex] = attr.ib(default=None, repr=False)
"""The next vertex."""
vprev: Optional["Vertex"] = attr.ib(default=None, repr=False)
vnext: Optional["Vertex"] = attr.ib(default=None, repr=False)

def __eq__(self, other: Any) -> bool:
"""Defines behavior for the equality operator, ==.
Expand Down
20 changes: 4 additions & 16 deletions pyrdf2vec/rdf2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,53 +17,41 @@
class RDF2VecTransformer:
"""Transforms nodes in a Knowledge Graph into an embedding."""

embedder: Embedder = attr.ib(
embedder = attr.ib(
factory=lambda: Word2Vec(),
type=Embedder,
validator=attr.validators.instance_of(Embedder), # type: ignore
)
"""The embedding technique."""

walkers: Sequence[Walker] = attr.ib(
walkers = attr.ib(
factory=lambda: [RandomWalker(2)], # type: ignore
type=Sequence[Walker],
validator=attr.validators.deep_iterable(
member_validator=attr.validators.instance_of(
Walker # type: ignore
),
iterable_validator=attr.validators.instance_of(list),
),
)
"""The walking strategy."""

verbose: int = attr.ib(
kw_only=True, default=0, validator=attr.validators.in_([0, 1, 2])
)
"""The verbosity level.
0: does not display anything;
1: display of the progress of extraction and training of walks;
2: debugging.
"""

_embeddings: Embeddings = attr.ib(init=False, factory=list)
"""All the embeddings of the model."""

_entities: Entities = attr.ib(init=False, factory=list)
"""All the entities of the model."""

_literals: Literals = attr.ib(init=False, factory=list)
"""All the literals of the model."""

_walks: List[str] = attr.ib(init=False, factory=list)
"""All the walks of the model."""

_is_extract_walks_literals = attr.ib(
init=False,
repr=False,
default=False,
validator=attr.validators.instance_of(bool),
)
"""True if the session must be closed after the call to the `transform`
function. False, otherwise.
"""

def fit(
self, walks: List[str], is_update: bool = False
Expand Down
1 change: 0 additions & 1 deletion pyrdf2vec/samplers/frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ class ObjFreqSampler(Sampler):
_counts: DefaultDict[str, int] = attr.ib(
init=False, repr=False, factory=lambda: defaultdict(dict)
)
"""Counter for vertices."""

def fit(self, kg: KG) -> None:
"""Fits the sampling strategy by counting the number of available
Expand Down
7 changes: 0 additions & 7 deletions pyrdf2vec/samplers/pagerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,17 @@ class PageRankSampler(Sampler):
nodes are more important than others and hence there will be resources
which are more frequent in the walks as others.
Args:
alpha: The damping for PageRank.
Defaults to 0.85.
"""

alpha: float = attr.ib(
kw_only=True,
default=0.85,
validator=attr.validators.instance_of(float),
)
"""The damping for Page Rank."""

_pageranks: Dict[str, float] = attr.ib(
init=False, repr=False, factory=dict
)
"""The Page Rank dictionary."""

def fit(self, kg: KG) -> None:
"""Fits the sampling strategy by running PageRank on a provided KG
Expand Down
12 changes: 0 additions & 12 deletions pyrdf2vec/samplers/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,38 +25,26 @@ class Sampler(ABC):
inverse: bool = attr.ib(
default=False, validator=attr.validators.instance_of(bool)
)
"""True if the inverse algorithm must be used, False otherwise."""

split: bool = attr.ib(
default=False, validator=attr.validators.instance_of(bool)
)
"""True if the split algorithm must be used, False otherwise."""

_is_support_remote: bool = attr.ib(init=False, repr=False, default=False)
"""True if the sampling strategy can be used with a remote Knowledge Graph,
False Otherwise.
"""

_random_state: Optional[int] = attr.ib(
init=False,
repr=False,
default=None,
)
"""The random state to use to keep random determinism with the sampling
strategy.
"""

_vertices_deg: Dict[str, int] = attr.ib(
init=False, repr=False, factory=dict
)
"""The degree of the vertices."""

_visited: Set[Tuple[Hop, int]] = attr.ib(
init=False, repr=False, factory=set
)
"""Tags vertices that appear at the max depth or of which all their
children are tagged.
"""

@abstractmethod
def fit(self, kg: KG) -> None:
Expand Down
5 changes: 0 additions & 5 deletions pyrdf2vec/samplers/uniform.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,12 @@ class UniformSampler(Sampler):
inverse: bool = attr.ib(
init=False, default=False, validator=attr.validators.instance_of(bool)
)
"""True if the inverse algorithm must be used, False otherwise."""

split: bool = attr.ib(
init=False, default=False, validator=attr.validators.instance_of(bool)
)
"""True if the split algorithm must be used, False otherwise."""

_is_support_remote: bool = attr.ib(init=False, repr=False, default=True)
"""True if the sampling strategy can be used with a remote Knowledge Graph,
False Otherwise.
"""

def fit(self, kg: KG) -> None:
"""Since the weights are uniform, this function does nothing.
Expand Down
6 changes: 3 additions & 3 deletions pyrdf2vec/typings.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union

if TYPE_CHECKING:
from pyrdf2vec.graphs import Vertex # noqa: F401

Hop = Tuple["Vertex", "Vertex"]
Hop = Tuple[Any, Any]

SWalk = Tuple[str, ...]
Walk = Tuple["Vertex", ...]
Walk = Tuple[Any, ...]

Embeddings = List[str]

Expand Down
5 changes: 0 additions & 5 deletions pyrdf2vec/walkers/community.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,12 @@ class CommunityWalker(Walker):
hop_prob: float = attr.ib(
kw_only=True, default=0.1, validator=attr.validators.instance_of(float)
)
"""The probability to hop."""

resolution: int = attr.ib(
kw_only=True, default=1, validator=attr.validators.instance_of(int)
)
"""The resolution to use."""

_is_support_remote: bool = attr.ib(init=False, repr=False, default=False)
"""True if the walking strategy can be used with a remote Knowledge Graph,
False Otherwise.
"""

def _community_detection(self, kg: KG) -> None:
"""Converts the knowledge graph to a networkX graph.
Expand Down
1 change: 0 additions & 1 deletion pyrdf2vec/walkers/halk.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class HALKWalker(RandomWalker):
iterable_validator=attr.validators.instance_of(list),
),
)
"""The minimum frequency thresholds of a hop to be kept."""

def _extract(self, kg: KG, instance: Vertex) -> EntityWalks:
"""Extracts walks rooted at the provided entities which are then each
Expand Down

0 comments on commit e076b3b

Please sign in to comment.