Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
[![PyPI-Server](https://img.shields.io/pypi/v/osw.svg)](https://pypi.org/project/osw/)
[![DOI](https://zenodo.org/badge/458130867.svg)](https://zenodo.org/badge/latestdoi/458130867)
[![Coveralls](https://img.shields.io/coveralls/github/OpenSemanticLab/osw-python/main.svg)](https://coveralls.io/r/<USER>/osw)
[![Coveralls](https://img.shields.io/coveralls/github/OpenSemanticLab/osw-python/main.svg)](https://coveralls.io/r/OpenSemanticLab/osw)
[![docs](xx.xx)](https://opensemanticlab.github.io/osw-python/)
![license](https://img.shields.io/github/license/OpenSemanticLab/osw-python.svg)
[![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
[![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](https://pyscaffold.org/)

# osw

Python toolset for data processing, queries, wikicode generation and page manipulation within OpenSemanticLab.
General features for object oriented interaction with knowledge graphs are planned to be moved to a standalone package: [oold-python](https://github.com/OpenSemanticWorld/oold-python)

General features for object-oriented interaction with knowledge graphs are planned to be moved to a standalone package:
[oold-python](https://github.com/OpenSemanticWorld/oold-python)

## Installation
```
Expand Down
1,163 changes: 1,163 additions & 0 deletions docs/tutorials/basics.ipynb

Large diffs are not rendered by default.

Binary file added docs/tutorials/img/osw_intro_data_model.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 0 additions & 1 deletion examples/use_express_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
overwrite=True, # Required if file already exists
)
local_file_path = local_file.path
local_file.close() # required to release the file lock

# Open a file with context manager directly from an OSW instance
with osw_download_file(
Expand Down
3 changes: 2 additions & 1 deletion src/osw/controller/file/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from osw.controller.file.base import FileController
from osw.core import model

# TODO: add addional remove file with https://docs.prefect.io/2.11.4/concepts/filesystems/
# TODO: add additional remove file with
# https://docs.prefect.io/2.11.4/concepts/filesystems/


# Note: the order of the base classes is important
Expand Down
14 changes: 8 additions & 6 deletions src/osw/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,13 +1073,13 @@ def store_entity_(
meta_category_template,
page.get_slot_content("jsondata"),
{
"_page_title": entity_title, # legacy
"_page_title": entity_title, # Legacy
"_current_subject_": entity_title,
},
)
schema = json.loads(schema_str)
# put generated schema in definitions section
# currently only enabled for Characteristics
# Put generated schema in definitions section,
# currently only enabled for Characteristics
if hasattr(model, "CharacteristicType") and isinstance(
entity_, model.CharacteristicType
):
Expand All @@ -1091,10 +1091,12 @@ def store_entity_(
}
schema["title"] = "Generated" + new_schema["title"]
schema = new_schema
page.set_slot_content("jsonschema", new_schema)
page.set_slot_content("jsonschema", schema)
except Exception as e:
print(f"Schema generation from template failed for {entity_}: {e}")
page.edit() # will set page.changed if the content of the page has changed
print(
f"Schema generation from template failed for " f"{entity}: {e}"
)
page.edit() # Will set page.changed if the content of the page has changed
if page.changed:
if index is None:
print(f"Entity stored at '{page.get_url()}'.")
Expand Down
61 changes: 58 additions & 3 deletions src/osw/data/import_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
from geopy import Nominatim
from jsonpath_ng import ext as jp
from pydantic.v1 import create_model

import osw.utils.strings as strutil
from osw import wiki_tools as wt
Expand All @@ -28,13 +29,58 @@

# Classes
class HelperModel(model.OswBaseModel):
"""Helper class for model transformations. The first base of the inheriting class
should always be the target class and the second base should be this class.

Example
-------
>>> class Person(model.OswBaseModel):
>>> first_name: str
>>> surname: str
>>> email: Set[str]
>>>
>>> john_dict = {"FirstName": "John", "LastName": "Doe", "Email": {
"john.doe@example.com"}}
>>>
>>> class PersonHelper(Person, HelperModel):
>>> FirstName: Any
>>> LastName: Any
>>> Email: Any
>>>
>>> def transform_attributes(self, dd: dict) -> bool:
>>> super().transform_attributes(dd)
>>> self.first_name = self.FirstName
>>> self.surname = self.LastName
>>> self.email = {self.Email}
>>> return True
"""

# Custom attributes
attributes_transformed: bool = False
references_transformed: bool = False
casted_instance: Any = None
full_page_title: Optional[str]

def transform_attributes(self, dd: dict) -> bool:
class Config:
arbitrary_types_allowed = True

def __init_subclass__(cls, **kwargs):
"""Will overwrite the annotations and fields of the inheriting class,
defined in the first base class with Optional[Any] annotations. This is
necessary to prevent errors when casting to the inheriting class."""
super().__init_subclass__(**kwargs)
first_base = cls.__bases__[0]
if not issubclass(first_base, model.OswBaseModel):
return None
fields = {name: (Optional[Any], None) for name in first_base.__annotations__}
new_first_base = create_model(first_base.__name__, **fields)
for field_name in new_first_base.__fields__:
if field_name in cls.__fields__: # Replace existing fields
cls.__fields__[field_name] = new_first_base.__fields__[field_name]
if field_name in cls.__annotations__: # Replace existing annotations
cls.__annotations__[field_name] = Optional[Any]

def transform_attributes(self, dd: dict = None) -> bool:
if not self.attributes_transformed:
uuid = uuid_module.uuid4()
if hasattr(self, "uuid"):
Expand All @@ -45,7 +91,7 @@ def transform_attributes(self, dd: dict) -> bool:
self.attributes_transformed = True
return True

def transform_references(self, dd: dict) -> bool:
def transform_references(self, dd: dict = None) -> bool:
if not self.attributes_transformed:
self.transform_attributes(dd)
if not self.references_transformed:
Expand All @@ -56,14 +102,22 @@ def transform_references(self, dd: dict) -> bool:
self.references_transformed = True
return True

def cast_to_superclass(self, dd):
def cast_to_superclass(self, dd: dict = None, return_casted: bool = False) -> bool:
"""Casts the instance to the superclass of the inheriting class. Assumes that
the first base of the inheriting class is the target class."""
if not self.references_transformed:
self.transform_references(dd)
else:
superclass = self.__class__.__bases__[0]
self.casted_instance = self.cast_none_to_default(cls=superclass)
if return_casted:
return self.casted_instance
return True

@property
def casted(self):
return self.cast_to_superclass(return_casted=True)


# Functions
def transform_attributes_and_merge(
Expand All @@ -89,6 +143,7 @@ def transform_attributes_and_merge(
if not inplace:
ent = copy.deepcopy(ent)
ent_as_dict = copy.deepcopy(ent_as_dict)
# Transform attributes
ent, ent_as_dict = loop_and_call_method(
entities=ent,
method_name="transform_attributes",
Expand Down
100 changes: 83 additions & 17 deletions src/osw/express.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
IO,
TYPE_CHECKING,
Any,
AnyStr,
Buffer,
Dict,
List,
Optional,
Expand Down Expand Up @@ -178,19 +180,22 @@ def __init__(
self.cred_filepath = cred_filepath

def __enter__(self):
"""Return self when entering the context manager."""
return self

def __exit__(self):
"""Close the connection to the OSL instance when exiting the context manager."""
self.close_connection()

def close_connection(self):
"""Close the connection to the OSL instance."""
self.site._site.connection.close()

def shut_down(self):
"""Makes sure this OSL instance can't be reused after it was shut down,
as the connection can't be reopened except when initializing a new instance."""
self.close_connection()
del self
# Make sure this osw instance can't be reused after it was shut down (the
# connection can't be reopened except when initializing a new instance)

def install_dependencies(
self,
Expand Down Expand Up @@ -334,8 +339,10 @@ def upload_file(
data = {**locals(), **properties}
# Clean data dict to avoid passing None values
data = {key: value for key, value in data.items() if value is not None}
# Make sure self is passed as osw_express
data["osw_express"] = self
# Initialize the UploadFileResult object
return UploadFileResult(source=source, osw_express=self, **data)
return UploadFileResult(source=source, **data)


class DataModel(OswBaseModel):
Expand All @@ -350,7 +357,10 @@ class DataModel(OswBaseModel):


def import_with_fallback(
to_import: List[DataModel], dependencies: Dict[str, str] = None, domain: str = None
to_import: Union[List[DataModel], Dict[str, str]],
module: str = None,
dependencies: Dict[str, str] = None,
domain: str = None,
):
"""Imports data models with a fallback to fetch the dependencies from an OSL
instance if the data models are not available in the local osw.model.entity module.
Expand All @@ -359,6 +369,9 @@ def import_with_fallback(
----------
to_import
List of DataModel objects to import.
module
(Optional) The module to import the data models from. Used only if to_import
is of type List[Dict]. Defaults to 'osw.model.entity' if not specified.
dependencies
A dictionary with the keys being the names of the dependencies and the values
being the full page name of the dependencies.
Expand All @@ -370,6 +383,18 @@ def import_with_fallback(
-------

"""
if isinstance(to_import, dict):
# Assume all DataModels are part of osw.model.entity
if module is None:
module = "osw.model.entity"
to_import = [
DataModel(
module=module,
class_name=key,
osw_fpt=value,
)
for key, value in to_import.items()
]
try:
for ti in to_import:
# Raises AttributeError if the target could not be found
Expand Down Expand Up @@ -449,7 +474,7 @@ def import_with_fallback(
class FileResult(OswBaseModel):
url_or_title: Optional[str] = None
"""The URL or full page title of the WikiFile page."""
file: Optional[TextIO] = None
file_io: Optional[TextIO] = None
"""The file object. They type depends on the file type."""
mode: str = "r"
"""The mode to open the file in. Default is 'r'. Implements the built-in open."""
Expand All @@ -476,20 +501,45 @@ class FileResult(OswBaseModel):
class Config:
arbitrary_types_allowed = True

def open(self, mode: str = "r", **kwargs):
def open(self, mode: str = None, **kwargs) -> TextIO:
"""Open the file, if not already opened using the 'mode' argument (priority) or
the 'mode' attribute."""
if mode is None:
mode = self.mode
kwargs["mode"] = mode
return open(self.path, **kwargs)
if self.file_io is None or self.file_io.closed:
return open(self.path, **kwargs)
return self.file_io

def close(self) -> None:
"""Close the file, if not already closed."""
if self.file_io is None or self.file_io.closed:
warn("File already closed or not opened.")
else:
self.file_io.close()

def close(self):
self.file.close()
def read(self, n: int = -1) -> AnyStr:
"""Read the file. If n is not specified, the entire file will be read.
If the file is not already opened, it will be opened."""
if self.file_io is None or self.file_io.closed:
self.file_io = self.open(mode="r")
return self.file_io.read(n)

def read(self, *args, **kwargs):
return self.file.read(*args, **kwargs)
def write(self, s: Union[Buffer, AnyStr]):
"""Write to the file. If the file is not already opened, it will be opened."""
if self.file_io is None or self.file_io.closed:
self.file_io = self.open(mode="w")
return self.file_io.write(s)

def __enter__(self):
"""Open the file when entering the context manager."""
if self.file_io is None or self.file_io.closed:
self.file_io = self.open()
return self

def __exit__(self, exc_type, exc_value, traceback):
"""Close the file when exiting the context manager, and deletes the file if
'delete_after_use' was set."""
self.close()
if self.delete_after_use and self.path.exists():
self.path.unlink()
Expand All @@ -505,6 +555,14 @@ def process_init_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
if data.get(key) is None:
data[key] = value
# Do replacements
if (
data.get("label") == InMemoryController.__fields__["label"].default
or data.get("label") == LocalFileController.__fields__["label"].default
or data.get("label") == WikiFileController.__fields__["label"].default
):
# Make sure that the label is not set to the default value, it will be
# set by the source file controller
del data["label"]
if data.get("cred_filepath") is None:
data["cred_filepath"] = cred_filepath_default.get_default()
if not data.get("cred_filepath").parent.exists():
Expand Down Expand Up @@ -603,8 +661,7 @@ def __init__(self, url_or_title, **data):
data = {key: value for key, value in data.items() if value is not None}
super().__init__(**{**lf.dict(), **data})
self.put_from(wf)
# Do open
self.file = self.open(mode=data.get("mode"))
# File is only opened at request to avoid locking the file


def osw_download_file(
Expand Down Expand Up @@ -777,10 +834,18 @@ def __init__(
)
# Create an osw_express object if not given
if data.get("osw_express") is None:
data["osw_express"] = OswExpress(
domain=data.get("domain"),
cred_mngr=data.get("cred_mngr"),
)
create_new = True
# Try to get the osw_express object from the source_file_controller
if data.get("source_file_controller") is not None:
if hasattr(data["source_file_controller"], "osw_express"):
create_new = False
data["osw_express"] = data["source_file_controller"].osw_express
# Otherwise create a new osw_express object
if create_new:
data["osw_express"] = OswExpress(
domain=data.get("domain"),
cred_mngr=data.get("cred_mngr"),
)
# If given set titel and namespace
if data.get("target_fpt") is not None:
namespace = data.get("target_fpt").split(":")[0]
Expand Down Expand Up @@ -883,6 +948,7 @@ def osw_upload_file(

OswExpress.update_forward_refs()


# todo:
# * create a .gitignore in the basepath that lists the default credentials file (
# accounts.pwd.yaml) OR append to an existing .gitignore#
Expand Down
3 changes: 2 additions & 1 deletion src/osw/model/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def test_if_empty_list_or_none(obj) -> bool:
k: v for k, v in self.dict().items() if not test_if_empty_list_or_none(v)
}
combined_args = {**self_args, **kwargs}
del combined_args["type"]
if "type" in combined_args:
del combined_args["type"]
return cls(**combined_args)


Expand Down
Loading
Loading