-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathitem.py
118 lines (96 loc) · 3.97 KB
/
item.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from pydantic import BaseModel
from darwin.path_utils import construct_full_path
@dataclass(frozen=True, eq=True)
class DatasetItem(BaseModel):
"""
DatasetItem represents files that can be images or videos which belong to a dataset.
"""
#: The id of this ``DatasetItem``.
id: int
#: The filename of this ``DatasetItem``.
filename: str
#: The status of this ``DatasetItem``. It can be ``"archived"``, ``"error"``, ``"uploading"``,
#: ``"processing"``, ``"new"``, ``"annotate"``, ``"review"`` or ``"complete"``.
status: str
#: Whether or not this item was soft deleted.
archived: bool
#: The size of this ``DatasetItem``\'s file in bytes.
filesize: int
#: The id of the ``Dataset`` this ``DatasetItem`` belongs to.
dataset_id: int
#: The slugified name of the ``Dataset`` this ``DatasetItem`` belongs to.
dataset_slug: str
#: The sequential value of this ``DatasetItem`` in relation to the ``Dataset`` it belongs to.
#: This allows us to know which items were added first and is used mostly for sorting purposes.
seq: int
#: The id of this ``DatasetItem``'s workflow. A ``None`` value means this ``DatasetItem`` is
#: new and was never worked on, or was reset to the new state.
current_workflow_id: Optional[int] = None
#: The darwin path to this ``DatasetItem``.
path: str
#: The names of each slot in the item, most items have a single slot corresponding to the file itself.
#: only used for v2 dataset items
slots: List[Any]
#: Information about the slot layout of the item including type, version, and slot names
layout: Dict
#: Metadata of this ``DatasetItem``'s workflow. A ``None`` value means this ``DatasetItem`` is
#: new and was never worked on, or was reset to the new state.
current_workflow: Optional[Dict[str, Any]] = None
@property
def full_path(self) -> str:
"""
The full POSIX relative path of this ``DatasetItem``.
"""
return construct_full_path(self.path, self.filename)
@classmethod
def parse(cls, raw: Dict[str, Any], dataset_slug: str = "n/a") -> "DatasetItem":
"""
Parses the given dictionary into a ``DatasetItem``.
Parameters
----------
raw : Dict[str, Any]
The dictionary to parse.
Returns
-------
DatasetItem
A dataset item with the parsed information.
Raises
------
ValidationError
If any of the keys from the given dictionary do not have the correct format or are
missing.
"""
if "slots" in raw:
data = {
"id": raw["id"],
"filename": raw["name"],
"path": raw["path"],
"status": raw["status"],
"archived": raw["archived"],
"filesize": sum(file.get("size_bytes", 0) for file in raw["slots"]),
"dataset_id": raw["dataset_id"],
"dataset_slug": dataset_slug,
"seq": None,
"current_workflow_id": raw.get("workflow_data", {}).get("workflow_id"),
"current_workflow": raw.get("workflow_data"),
"slots": raw["slots"],
"layout": raw.get("layout"),
}
else:
data = {
"id": raw["id"],
"filename": raw["filename"],
"status": raw["status"],
"archived": raw["archived"],
"filesize": raw["file_size"],
"dataset_id": raw["dataset_id"],
"dataset_slug": dataset_slug,
"seq": raw["seq"],
"current_workflow_id": raw.get("current_workflow_id"),
"current_workflow": raw.get("current_workflow"),
"path": raw["path"],
"slots": [],
}
return DatasetItem(**data)