Skip to content

Commit

Permalink
[deviantart] fix 'metadata' & 'folders' for shared deviations
Browse files Browse the repository at this point in the history
* refactor '_metadata'
* add ability to process embedded deviations
* deduplicate and condense shared/embedded deviations
  • Loading branch information
ClosedPort22 committed Feb 19, 2023
1 parent 4f029ab commit 8a10e61
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 21 deletions.
93 changes: 72 additions & 21 deletions gallery_dl/extractor/deviantart.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,10 +781,17 @@ class DeviantartStatusExtractor(DeviantartExtractor):
}),
# shared deviation
("https://www.deviantart.com/justgalym/posts/statuses", {
"options": (("journals", "none"),),
"options": (("journals", "none"), ("metadata", 1), ("folders", 1)),
"count": 1,
"pattern": r"https://images-wixmp-\w+\.wixmp\.com/f"
r"/[^/]+/[^.]+\.jpg\?token=",
"keyword": {
"description": str,
"folders": list,
"is_watching": bool,
"license": str,
"tags": list,
}
}),
# shared sta.sh item
("https://www.deviantart.com/vanillaghosties/posts/statuses", {
Expand Down Expand Up @@ -1490,20 +1497,25 @@ def _pagination(self, endpoint, params,
"Private deviations detected! Run 'gallery-dl "
"oauth:deviantart' and follow the instructions to "
"be able to access them.")
# "statusid" cannot be used instead
if results and "deviationid" in results[0]:

# 'statusid' cannot be used in place of 'deviationid'
deviations = results if \
results and "deviationid" in results[0] else None
nested_deviations = self._embedded_shared_content(results)
for dev in nested_deviations:
if not dev["is_deleted"]:
continue
patch = self._call(
"/deviation/" + dev["deviationid"], fatal=False)
if patch:
dev.update(patch)
for devs in (deviations, nested_deviations):
if not devs:
continue
if self.metadata:
self._metadata(results)
self._metadata(devs)
if self.folders:
self._folders(results)
else: # attempt to fix "deleted" deviations
for dev in self._shared_content(results):
if not dev["is_deleted"]:
continue
patch = self._call(
"/deviation/" + dev["deviationid"], fatal=False)
if patch:
dev.update(patch)
self._folders(devs)

yield from results

Expand All @@ -1523,12 +1535,50 @@ def _pagination(self, endpoint, params,
params["offset"] = int(params["offset"]) + len(results)

@staticmethod
def _shared_content(results):
"""Return an iterable of shared deviations in 'results'"""
def _embedded_shared_content(results):
"""Deduplicate and return an iterable of embedded/shared deviations
in 'results'
"""
# gather deviations and condense duplicates
items = {}
for result in results:
# shared content in statuses
for item in result.get("items") or ():
if "deviation" in item:
yield item["deviation"]
if "deviation" not in item:
continue
dev = item["deviation"]
if dev["deviationid"] in items:
item["deviation"] = items[dev["deviationid"]]
else:
items[dev["deviationid"]] = dev
# embedded content in statuses and journals
# (/user/profile/posts endpoint)
try:
entity_map = \
result["text_content"]["body"]["markup"]["entityMap"]
except KeyError:
continue
for entity in entity_map.values():
if "data" not in entity["data"]:
continue
data = entity["data"]["data"]
if isinstance(data, dict) and "deviationid" in data:
if data["deviationid"] in items:
entity["data"]["data"] = items[data["deviationid"]]
else:
items[data["deviationid"]] = data
continue
if not isinstance(data, list):
continue
for dev, idx in enumerate(data):
if not isinstance(dev, dict) or "deviationid" not in dev:
continue
if dev["deviationid"] in items:
data[idx] = items[dev["deviationid"]]
else:
items[dev["deviationid"]] = dev

return items.values()

def _pagination_list(self, endpoint, params, key="results"):
result = []
Expand All @@ -1537,10 +1587,11 @@ def _pagination_list(self, endpoint, params, key="results"):

def _metadata(self, deviations):
"""Add extended metadata to each deviation object"""
for deviation, metadata in zip(
deviations, self.deviation_metadata(deviations)):
deviation.update(metadata)
deviation["tags"] = [t["tag_name"] for t in deviation["tags"]]
for batch in util.batched(deviations, 50):
for deviation, metadata in zip(
batch, self.deviation_metadata(batch)):
deviation.update(metadata)
deviation["tags"] = [t["tag_name"] for t in deviation["tags"]]

def _folders(self, deviations):
"""Add a list of all containing folders to each deviation object"""
Expand Down
14 changes: 14 additions & 0 deletions gallery_dl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,20 @@ def advance(iterable, num):
return iterator


def batched(iterable, n):
"""Batch 'iterable' into tuples of length 'n'"""
# batched('ABCDEFG', 3) --> ABC DEF G
# https://stackoverflow.com/questions/8991506
if n < 1:
raise ValueError("n must be at least one")
it = iter(iterable)
while True:
chunk = tuple(itertools.islice(it, n))
if not chunk:
return
yield chunk


def unique(iterable):
"""Yield unique elements from 'iterable' while preserving order"""
seen = set()
Expand Down

0 comments on commit 8a10e61

Please sign in to comment.