Skip to content

Commit

Permalink
feat(dataset): make dataset providers plugins (#3055)
Browse files Browse the repository at this point in the history
  • Loading branch information
olevski committed Jul 28, 2022
1 parent d7be929 commit b68a8bb
Show file tree
Hide file tree
Showing 15 changed files with 209 additions and 34 deletions.
23 changes: 23 additions & 0 deletions docs/reference/models/dataset_provider.rst
@@ -0,0 +1,23 @@
..
Copyright 2017-2022 - Swiss Data Science Center (SDSC)
A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
Eidgenössische Technische Hochschule Zürich (ETHZ).
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Dataset Provider
----------------

.. automodule:: renku.domain_model.dataset_provider
:members:
:show-inheritance:
1 change: 1 addition & 0 deletions docs/reference/models/index.rst
Expand Up @@ -33,3 +33,4 @@ Models
session
template
refs
dataset_provider
12 changes: 10 additions & 2 deletions renku/core/dataset/providers/dataverse.py
Expand Up @@ -23,7 +23,7 @@
import urllib
from pathlib import Path
from string import Template
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
from urllib import parse as urlparse

from renku.command.command_builder import inject
Expand All @@ -37,10 +37,12 @@
from renku.core.dataset.providers.doi import DOIProvider
from renku.core.dataset.providers.repository import RepositoryImporter, make_request
from renku.core.interface.client_dispatcher import IClientDispatcher
from renku.core.plugin import hookimpl
from renku.core.util import communication
from renku.core.util.doi import extract_doi, get_doi_url, is_doi
from renku.core.util.file_size import bytes_to_unit
from renku.core.util.urls import remove_credentials
from renku.domain_model.dataset_provider import IDatasetProviderPlugin

if TYPE_CHECKING:
from renku.core.dataset.providers.models import ProviderDataset, ProviderParameter
Expand Down Expand Up @@ -72,7 +74,7 @@
]


class DataverseProvider(ProviderApi):
class DataverseProvider(ProviderApi, IDatasetProviderPlugin):
"""Dataverse API provider."""

priority = ProviderPriority.HIGH
Expand Down Expand Up @@ -183,6 +185,12 @@ def set_export_parameters(client_dispatcher: IClientDispatcher):
set_export_parameters()
return DataverseExporter(dataset=dataset, server_url=self._server_url, dataverse_name=self._dataverse_name)

@classmethod
@hookimpl
def dataset_provider(cls) -> "Type[DataverseProvider]":
"""The definition of the provider."""
return cls


class DataverseImporter(RepositoryImporter):
"""Dataverse record serializer."""
Expand Down
11 changes: 10 additions & 1 deletion renku/core/dataset/providers/doi.py
Expand Up @@ -19,15 +19,18 @@

import urllib
from pathlib import Path
from typing import Type

from renku.core import errors
from renku.core.dataset.providers.api import ImporterApi, ProviderApi, ProviderPriority
from renku.core.plugin import hookimpl
from renku.core.util.doi import extract_doi, is_doi
from renku.domain_model.dataset_provider import IDatasetProviderPlugin

DOI_BASE_URL = "https://dx.doi.org"


class DOIProvider(ProviderApi):
class DOIProvider(ProviderApi, IDatasetProviderPlugin):
"""`doi.org <http://doi.org>`_ registry API provider."""

priority = ProviderPriority.HIGHER
Expand Down Expand Up @@ -70,6 +73,12 @@ def serialize(response):
query_response = query(uri)
return serialize(query_response)

@classmethod
@hookimpl
def dataset_provider(cls) -> "Type[DOIProvider]":
"""The definition of the provider."""
return cls


class DOIImporter(ImporterApi):
"""Response from `doi.org <http://doi.org>`_ for DOI metadata."""
Expand Down
20 changes: 2 additions & 18 deletions renku/core/dataset/providers/factory.py
Expand Up @@ -21,6 +21,7 @@
from urllib.parse import urlparse

from renku.core import errors
from renku.core.plugin.dataset_provider import get_supported_dataset_providers
from renku.core.util import communication
from renku.core.util.doi import is_doi

Expand All @@ -34,24 +35,7 @@ class ProviderFactory:
@staticmethod
def get_providers():
"""Return a list of providers sorted based on their priorities (higher priority providers come first)."""
from renku.core.dataset.providers.dataverse import DataverseProvider
from renku.core.dataset.providers.git import GitProvider
from renku.core.dataset.providers.local import FilesystemProvider
from renku.core.dataset.providers.olos import OLOSProvider
from renku.core.dataset.providers.renku import RenkuProvider
from renku.core.dataset.providers.web import WebProvider
from renku.core.dataset.providers.zenodo import ZenodoProvider

providers = [
DataverseProvider,
GitProvider,
FilesystemProvider,
OLOSProvider,
RenkuProvider,
WebProvider,
ZenodoProvider,
]

providers = get_supported_dataset_providers()
return sorted(providers, key=lambda p: p.priority)

@staticmethod
Expand Down
12 changes: 10 additions & 2 deletions renku/core/dataset/providers/git.py
Expand Up @@ -21,23 +21,25 @@
import os
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Type, Union

from renku.core import errors
from renku.core.dataset.providers.api import ProviderApi, ProviderPriority
from renku.core.plugin import hookimpl
from renku.core.util import communication
from renku.core.util.dataset import check_url
from renku.core.util.git import clone_repository, get_cache_directory_for_repository
from renku.core.util.os import get_files, is_subpath
from renku.core.util.urls import remove_credentials
from renku.domain_model.dataset import RemoteEntity
from renku.domain_model.dataset_provider import IDatasetProviderPlugin

if TYPE_CHECKING:
from renku.core.dataset.providers.models import DatasetAddMetadata, ProviderParameter
from renku.core.management.client import LocalClient


class GitProvider(ProviderApi):
class GitProvider(ProviderApi, IDatasetProviderPlugin):
"""Git provider."""

priority = ProviderPriority.NORMAL
Expand Down Expand Up @@ -186,3 +188,9 @@ def get_metadata(src: Path, dst: Path) -> Optional["DatasetAddMetadata"]:
communication.warn(f"The following files overwrite each other in the destination project:/n/t{files_str}")

return results

@classmethod
@hookimpl
def dataset_provider(cls) -> "Type[GitProvider]":
"""The definition of the provider."""
return cls
12 changes: 10 additions & 2 deletions renku/core/dataset/providers/local.py
Expand Up @@ -21,21 +21,23 @@
import urllib
import uuid
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional
from typing import TYPE_CHECKING, List, Optional, Type

from renku.core import errors
from renku.core.dataset.providers.api import ExporterApi, ProviderApi, ProviderPriority
from renku.core.plugin import hookimpl
from renku.core.util import communication
from renku.core.util.dataset import check_url
from renku.core.util.os import get_absolute_path, is_path_empty
from renku.domain_model.dataset_provider import IDatasetProviderPlugin

if TYPE_CHECKING:
from renku.core.dataset.providers.models import DatasetAddMetadata, ProviderParameter
from renku.core.management.client import LocalClient
from renku.domain_model.dataset import Dataset, DatasetTag


class FilesystemProvider(ProviderApi):
class FilesystemProvider(ProviderApi, IDatasetProviderPlugin):
"""Local filesystem provider."""

priority = ProviderPriority.LOW
Expand Down Expand Up @@ -225,6 +227,12 @@ def get_importer(self, uri, **kwargs):
"""Get import manager."""
raise NotImplementedError

@classmethod
@hookimpl
def dataset_provider(cls) -> "Type[FilesystemProvider]":
"""The definition of the provider."""
return cls


class LocalExporter(ExporterApi):
"""Local export manager."""
Expand Down
12 changes: 10 additions & 2 deletions renku/core/dataset/providers/olos.py
Expand Up @@ -20,22 +20,24 @@
import datetime
import urllib
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional
from typing import TYPE_CHECKING, List, Optional, Type
from urllib import parse as urlparse
from uuid import UUID, uuid4

from renku.command.command_builder import inject
from renku.core import errors
from renku.core.dataset.providers.api import ExporterApi, ProviderApi, ProviderPriority
from renku.core.interface.client_dispatcher import IClientDispatcher
from renku.core.plugin import hookimpl
from renku.core.util import communication
from renku.domain_model.dataset_provider import IDatasetProviderPlugin

if TYPE_CHECKING:
from renku.core.dataset.providers.models import ProviderParameter
from renku.domain_model.dataset import Dataset, DatasetTag


class OLOSProvider(ProviderApi):
class OLOSProvider(ProviderApi, IDatasetProviderPlugin):
"""Provider for OLOS integration."""

priority = ProviderPriority.HIGH
Expand Down Expand Up @@ -87,6 +89,12 @@ def set_export_parameters(client_dispatcher: IClientDispatcher):
set_export_parameters()
return OLOSExporter(dataset=dataset, server_url=self._server_url)

@classmethod
@hookimpl
def dataset_provider(cls) -> "Type[OLOSProvider]":
"""The definition of the provider."""
return cls


class OLOSExporter(ExporterApi):
"""OLOS export manager."""
Expand Down
12 changes: 10 additions & 2 deletions renku/core/dataset/providers/renku.py
Expand Up @@ -22,7 +22,7 @@
import urllib
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List
from typing import TYPE_CHECKING, Any, Dict, List, Type

from renku.command.command_builder.command import inject
from renku.command.login import read_renku_token
Expand All @@ -31,19 +31,21 @@
from renku.core.dataset.providers.api import ImporterApi, ProviderApi, ProviderPriority
from renku.core.interface.client_dispatcher import IClientDispatcher
from renku.core.interface.database_dispatcher import IDatabaseDispatcher
from renku.core.plugin import hookimpl
from renku.core.util import communication
from renku.core.util.file_size import bytes_to_unit
from renku.core.util.git import clone_renku_repository, get_cache_directory_for_repository, get_file_size
from renku.core.util.metadata import is_external_file, make_project_temp_dir
from renku.core.util.urls import remove_credentials
from renku.domain_model.dataset_provider import IDatasetProviderPlugin

if TYPE_CHECKING:
from renku.core.dataset.providers.models import DatasetAddMetadata, ProviderDataset, ProviderParameter
from renku.core.management.client import LocalClient
from renku.domain_model.dataset import Dataset


class RenkuProvider(ProviderApi):
class RenkuProvider(ProviderApi, IDatasetProviderPlugin):
"""Renku API provider."""

priority = ProviderPriority.HIGH
Expand Down Expand Up @@ -224,6 +226,12 @@ def _prepare_auth(self, uri):

self._authorization_header = {"Authorization": f"Bearer {token}"} if token else {}

@classmethod
@hookimpl
def dataset_provider(cls) -> "Type[RenkuProvider]":
"""The definition of the provider."""
return cls


class RenkuImporter(ImporterApi):
"""Renku record serializer."""
Expand Down
12 changes: 10 additions & 2 deletions renku/core/dataset/providers/web.py
Expand Up @@ -21,22 +21,24 @@
import os
import urllib
from pathlib import Path
from typing import TYPE_CHECKING, List, Tuple
from typing import TYPE_CHECKING, List, Tuple, Type

from renku.core import errors
from renku.core.constant import CACHE
from renku.core.dataset.context import wait_for
from renku.core.dataset.providers.api import ProviderApi, ProviderPriority
from renku.core.plugin import hookimpl
from renku.core.util import communication
from renku.core.util.dataset import check_url
from renku.core.util.urls import remove_credentials
from renku.domain_model.dataset_provider import IDatasetProviderPlugin

if TYPE_CHECKING:
from renku.core.dataset.providers.models import DatasetAddMetadata
from renku.core.management.client import LocalClient


class WebProvider(ProviderApi):
class WebProvider(ProviderApi, IDatasetProviderPlugin):
"""A provider for downloading data from web URLs."""

priority = ProviderPriority.LOWEST
Expand Down Expand Up @@ -69,6 +71,12 @@ def add(
client=client, uri=uri, destination=destination, extract=extract, filename=filename, multiple=multiple
)

@classmethod
@hookimpl
def dataset_provider(cls) -> "Type[WebProvider]":
"""The definition of the provider."""
return cls


def _ensure_dropbox(url):
"""Ensure dropbox url is set for file download."""
Expand Down
12 changes: 10 additions & 2 deletions renku/core/dataset/providers/zenodo.py
Expand Up @@ -22,16 +22,18 @@
import pathlib
import urllib
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
from urllib.parse import urlparse

from renku.core import errors
from renku.core.dataset.providers.api import ExporterApi, ProviderApi, ProviderPriority
from renku.core.dataset.providers.repository import RepositoryImporter, make_request
from renku.core.plugin import hookimpl
from renku.core.util import communication
from renku.core.util.doi import is_doi
from renku.core.util.file_size import bytes_to_unit
from renku.core.util.urls import remove_credentials
from renku.domain_model.dataset_provider import IDatasetProviderPlugin

if TYPE_CHECKING:
from renku.core.dataset.providers.models import ProviderDataset, ProviderParameter
Expand All @@ -52,7 +54,7 @@
ZENODO_NEW_DEPOSIT_URL = "depositions"


class ZenodoProvider(ProviderApi):
class ZenodoProvider(ProviderApi, IDatasetProviderPlugin):
"""Zenodo registry API provider."""

priority = ProviderPriority.HIGH
Expand Down Expand Up @@ -120,6 +122,12 @@ def get_exporter(
self._publish = publish
return ZenodoExporter(dataset=dataset, publish=self._publish, tag=tag)

@classmethod
@hookimpl
def dataset_provider(cls) -> "Type[ZenodoProvider]":
"""The definition of the provider."""
return cls


class ZenodoImporter(RepositoryImporter):
"""Zenodo importer."""
Expand Down

0 comments on commit b68a8bb

Please sign in to comment.