-
Notifications
You must be signed in to change notification settings - Fork 58
refactor: replace sqlite with rocksdb #109
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,3 @@ | ||
| from .json_storage import JsonKVStorage, JsonListStorage | ||
| from .networkx_storage import NetworkXStorage | ||
| from .rocksdb_cache import RocksDBCache |
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,43 @@ | ||||||||||||||||
| from pathlib import Path | ||||||||||||||||
| from typing import Any, Iterator, Optional | ||||||||||||||||
|
|
||||||||||||||||
| # rocksdict is a lightweight C wrapper around RocksDB for Python, pylint may not recognize it | ||||||||||||||||
| # pylint: disable=no-name-in-module | ||||||||||||||||
| from rocksdict import Rdict | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| class RocksDBCache: | ||||||||||||||||
| def __init__(self, cache_dir: str): | ||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For improved flexibility and to align with modern Python practices using
Suggested change
|
||||||||||||||||
| self.db_path = Path(cache_dir) | ||||||||||||||||
| self.db = Rdict(str(self.db_path)) | ||||||||||||||||
|
|
||||||||||||||||
| def get(self, key: str) -> Optional[Any]: | ||||||||||||||||
| return self.db.get(key) | ||||||||||||||||
|
|
||||||||||||||||
| def set(self, key: str, value: Any): | ||||||||||||||||
| self.db[key] = value | ||||||||||||||||
|
|
||||||||||||||||
| def delete(self, key: str): | ||||||||||||||||
| try: | ||||||||||||||||
| del self.db[key] | ||||||||||||||||
| except KeyError: | ||||||||||||||||
github-code-quality[bot] marked this conversation as resolved.
Fixed
Show fixed
Hide fixed
|
||||||||||||||||
| # If the key does not exist, do nothing (deletion is idempotent for caches) | ||||||||||||||||
| pass | ||||||||||||||||
|
|
||||||||||||||||
| def close(self): | ||||||||||||||||
| if hasattr(self, "db") and self.db is not None: | ||||||||||||||||
| self.db.close() | ||||||||||||||||
| self.db = None | ||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For better resource management and to make This would allow using
Suggested change
|
||||||||||||||||
|
|
||||||||||||||||
| def __del__(self): | ||||||||||||||||
| # Ensure the database is closed when the object is destroyed | ||||||||||||||||
| self.close() | ||||||||||||||||
|
|
||||||||||||||||
| def __enter__(self): | ||||||||||||||||
| return self | ||||||||||||||||
|
|
||||||||||||||||
| def __exit__(self, exc_type, exc_val, exc_tb): | ||||||||||||||||
| self.close() | ||||||||||||||||
|
|
||||||||||||||||
| def __iter__(self) -> Iterator[str]: | ||||||||||||||||
| return iter(self.db.keys()) | ||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4,16 +4,15 @@ | |||||
| from pathlib import Path | ||||||
| from typing import Any, Dict, List, Set, Union | ||||||
|
|
||||||
| from diskcache import Cache | ||||||
|
|
||||||
| from graphgen.models import RocksDBCache | ||||||
| from graphgen.utils import logger | ||||||
|
|
||||||
|
|
||||||
| class ParallelFileScanner: | ||||||
| def __init__( | ||||||
| self, cache_dir: str, allowed_suffix, rescan: bool = False, max_workers: int = 4 | ||||||
| ): | ||||||
| self.cache = Cache(cache_dir) | ||||||
| self.cache = RocksDBCache(os.path.join(cache_dir, "file_paths_cache")) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since To make this even cleaner, you could consider updating
Suggested change
|
||||||
| self.allowed_suffix = set(allowed_suffix) if allowed_suffix else None | ||||||
| self.rescan = rescan | ||||||
| self.max_workers = max_workers | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To support a more flexible type hint in
__init__, please importUnionfrom thetypingmodule.