/
models.py
152 lines (122 loc) · 4.71 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Copyright 2017-2022 - Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Models for providers."""
import os
from enum import Enum, auto
from pathlib import Path
from typing import Any, List, NamedTuple, Optional, Type
from marshmallow import EXCLUDE
from renku.command.schema.dataset import DatasetSchema
from renku.domain_model.dataset import Dataset, DatasetTag, RemoteEntity
class DatasetAddAction(Enum):
"""Types of action when adding a file to a dataset."""
COPY = auto()
MOVE = auto()
SYMLINK = auto()
NONE = auto()
class DatasetAddMetadata(NamedTuple):
"""Metadata for a new file that will be added to a dataset."""
entity_path: Path
url: str
action: DatasetAddAction
source: Path
destination: Path
based_on: Optional["RemoteEntity"] = None
gitignored: bool = False
@property
def has_action(self) -> bool:
"""Returns if file action is not NONE."""
return self.action != DatasetAddAction.NONE
def get_absolute_commit_path(self, client_path: Path) -> str:
"""Return path of the file in the repository."""
return os.path.join(client_path, self.entity_path)
class ProviderParameter(NamedTuple):
"""Provider-specific parameters."""
name: str
default: Any = None
flags: List[str] = []
help: str = ""
is_flag: bool = False
multiple: bool = False
type: Optional[Type] = None
class ProviderDataset(Dataset):
"""A Dataset that is imported from a provider."""
def __init__(self, **kwargs):
kwargs.setdefault("initial_identifier", "invalid-initial-id")
super().__init__(**kwargs)
self.dataset_files = [] # TODO Make this a property
self._tag: Optional["DatasetTag"] = None
@classmethod
def from_jsonld(cls, data, schema_class=None) -> "ProviderDataset":
"""Create an instance from JSON-LD data."""
assert isinstance(data, (dict, list)), f"Invalid data type: {data}"
schema_class = schema_class or DatasetSchema
self = schema_class(flattened=True).load(data)
return self
@classmethod
def from_dataset(cls, dataset: "Dataset") -> "ProviderDataset":
"""Create an instance from a Dataset."""
return ProviderDataset(
annotations=dataset.annotations,
creators=dataset.creators,
dataset_files=[],
date_created=dataset.date_created,
date_published=dataset.date_published,
date_removed=dataset.date_removed,
derived_from=dataset.derived_from,
description=dataset.description,
id=dataset.id,
identifier=dataset.identifier,
images=dataset.images,
in_language=dataset.in_language,
initial_identifier=dataset.initial_identifier,
keywords=dataset.keywords,
license=dataset.license,
name=dataset.name,
project_id=dataset.project_id,
same_as=dataset.same_as,
title=dataset.title,
version=dataset.version,
)
@property
def files(self):
"""Return list of existing files."""
raise NotImplementedError("ProviderDataset has no files.")
@property
def tag(self) -> Optional["DatasetTag"]:
"""Return dataset's tag."""
return self._tag
@tag.setter
def tag(self, value):
"""Set dataset's tag."""
self._tag = value
class ProviderDatasetFile:
"""Store metadata for dataset files that will be downloaded from a provider."""
def __init__(
self, source: Optional[str], filename: str, checksum: str, size_in_mb: Optional[float], filetype: str, path: str
):
self.checksum: str = checksum
self.filename: str = filename
self.filetype: str = filetype
self.path: str = path
self.size_in_mb: Optional[float] = size_in_mb
self.source: Optional[str] = source
class ProviderDatasetSchema(DatasetSchema):
"""ProviderDataset schema."""
class Meta:
"""Meta class."""
model = ProviderDataset
unknown = EXCLUDE