In [50]:
import logging
import os
import tornado.web as tw
import tornado.ioloop as ti

from dbc_pyutils import JSONFormatter
from dbc_pyutils import build_info
from dbc_pyutils import create_instance_id
from dbc_pyutils import Statistics
from dbc_pyutils import Stat
from dbc_pyutils import BaseHandler
from dbc_pyutils import StatusHandler
from dbc_pyutils import StaticHandler
from dbc_pyutils import create_post_examples_from_dir
from dbc_data import lowell_mapping_functions as lmf
from dataclasses import dataclass
from dataclasses import field
from typing import Type
from typing import Dict
from typing import List
import json5


In [51]:
@dataclass
class Work:
    workid: str
    can_be_read_independently: bool
    universe: Type["Universe"]
    series_memberships: dict = field(default_factory=dict) # dict from str (Series) -> list[int]

@dataclass
class Series:
    series_title: str
    series_description: str
    number_in_universe: int
    universe: Type["Universe"]
    included_works: set = field(default_factory=set) # set of WorkIds
    series_alternative_title: list = field(default_factory=list) # list of strings

@dataclass
class Universe:
    universe_title: str
    universe_description: str
    universe_alternative_title: list = field(default_factory=list) # list of strings
    included_series: set = field(default_factory=set) # set of Series titles objects
    included_works: set = field(default_factory=set) # set of WorkIds


In [52]:
def read_json_file(path, filename, input_works_dict, input_series_dict, input_universe_dict):
    works_dict: dict = input_works_dict
    series_dict: dict = input_series_dict
    universe_dict: dict = input_universe_dict
    with open(os.path.join(path, filename)) as fp:
        obj_list = json5.load(fp)
        for obj in obj_list:
            if "universeTitle" in obj and not obj["universeTitle"] in universe_dict:
                universe_description = obj.get("universeDescription", None)
                universe_alternative_title_str = obj.get("universeAlternativeTitle", None)
                universe_alternative_title = universe_alternative_title_str if universe_alternative_title_str else None
                universe = Universe(universe_title=obj["universeTitle"], universe_description=universe_description, universe_alternative_title=universe_alternative_title)
                universe_dict[obj["universeTitle"]] = universe
        for obj in obj_list:
            if "seriesTitle" in obj and not obj["seriesTitle"] in series_dict:
                series_descr = obj.get("seriesDescription", None)
                number_in_universe_str = obj.get("numberInUniverse", None)
                number_in_universe = int(number_in_universe_str) if number_in_universe_str  else None
                alternative_title_str = obj.get("seriesAlternativeTitle", None)
                series_alternative_title = alternative_title_str if alternative_title_str else None
                universe = universe_dict[obj["universeTitle"]] if "universeTitle" in obj else None
                series = Series(series_title=obj["seriesTitle"], series_description=series_descr, number_in_universe=number_in_universe, universe=universe, series_alternative_title=series_alternative_title)
                series_dict[obj["seriesTitle"]] = series
                universe = universe_dict.get(obj['universeTitle'], None) if 'universeTitle' in obj else None
                if universe:
                    universe.included_series.add(series.series_title)
        for obj in obj_list:
            if "workId" in obj:
                series = series_dict.get(obj["seriesTitle"], None) if "seriesTitle" in obj else None
                universe = universe_dict.get(obj['universeTitle'], None) if 'universeTitle' in obj else None
                number_in_series_str = obj.get("numberInSeries", None)
                number_in_series = [int(s) for s in number_in_series_str] if number_in_series_str else None
                if not obj["workId"] in works_dict: # this is a work we have not seen before
                    universe = universe_dict.get(obj["universeTitle"], None) if "universeTitle" in obj else None
                    can_be_read_independently = obj.get("canBeReadIndependently", False)
                    work = Work(workid=obj["workId"], series_memberships={series.series_title: number_in_series} if series else {}, can_be_read_independently=can_be_read_independently, universe=universe.universe_title if universe else None)
                    works_dict[obj["workId"]] = work
                else:
                    work = works_dict.get(obj["workId"])
                    if work:
                        series_memberships = work.series_memberships
                        if series_memberships and series:
                            series_memberships[series.series_title] = number_in_series
                if series:
                    series.included_works.add(work.workid)
                if universe:
                    universe.included_works.add(work.workid)
    return works_dict, series_dict, universe_dict


In [53]:
works_dict: dict = {}
series_dict: dict = {}
universe_dict: dict = {}
data_dir = '../data-files'
json_files = [json_file for json_file in os.listdir(data_dir) if json_file.endswith('.json')]
for jf in json_files:
    works_dict, series_dict, universe_dict = read_json_file(data_dir, jf, works_dict, series_dict, universe_dict)


In [49]:
works_dict

{'870970-basis:39014009': Work(workid='870970-basis:39014009', can_be_read_independently=False, universe=None, series_memberships={}),
 '870970-basis:52623138': Work(workid='870970-basis:52623138', can_be_read_independently=False, universe=None, series_memberships={}),
 '870970-basis:51998464': Work(workid='870970-basis:51998464', can_be_read_independently=False, universe=None, series_memberships={}),
 '870970-basis:51026349': Work(workid='870970-basis:51026349', can_be_read_independently=False, universe=None, series_memberships={}),
 '870970-basis:48960596': Work(workid='870970-basis:48960596', can_be_read_independently=False, universe=None, series_memberships={'De 5': [19]}),
 '870970-basis:47917786': Work(workid='870970-basis:47917786', can_be_read_independently=False, universe=None, series_memberships={'De 5': [2]}),
 '870970-basis:51663535': Work(workid='870970-basis:51663535', can_be_read_independently=False, universe=None, series_memberships={'De 5 (samling)': [6], 'De 5': [16, 