In [1]:
import sys

sys.path.append('..')

In [2]:
from yaml import load, Loader
from dltc_backcatalog.logic_types import Author, Article, JournalIssue, author_full_name, journal_date, TMonth, TYear, MONTH_STR_INT, TGivenName, TFamilyName
from aletk.utils import remove_extra_whitespace
from typing import Dict, Tuple

from faker import Faker
fake = Faker()

In [3]:

dummy_authors = [
    Author(
        given_name=fake.name(),
        family_name=fake.name(),
        correspondence=fake.boolean(),
        email=fake.email(),
        institute=fake.company(),
        orcid=fake.uuid4()
    ) for _ in range(10)
]

In [4]:
dummy_articles = [
    Article(
        title=fake.sentence(),
        subtitle=fake.sentence(),
        abstract=remove_extra_whitespace(fake.text()),
        authors=[fake.random_element(dummy_authors) for _ in range(3)],
        first_page=fake.random_int(),
        last_page=fake.random_int(),
        doi=fake.uuid4(),
        galleys=fake.file_path(),
        keywords=[fake.word() for _ in range(5)],
    ) for _ in range(10)
]

In [5]:
dummy_journal_issue = JournalIssue(
    journal=fake.company(),
    volume=fake.random_int(),
    issue=fake.random_int(),
    month=fake.random_int(1, 12),
    year=fake.random_int(1947, 2020),
    first_page=fake.random_int(),
    doi=fake.uuid4(),
    issnprint=fake.uuid4(),
    title=fake.sentence(),
    editors=[fake.random_element(dummy_authors) for _ in range(2)],
    articles=dummy_articles
)

TypeError: JournalIssue.__init__() missing 1 required positional argument: 'issn'

In [6]:
dummy_journal_issue

JournalIssue(journal='Reese, Grimes and Pacheco', volume=235, issue=9920, month=9, year=1984, first_page=4470, doi='ba7d69e7-7eb1-407e-9027-2a130c24bee5', issnprint='2ba4a3ce-b4e8-4cff-a555-bc6eb0751765', title='Together plant difficult answer fill budget.', editors=[Author(given_name='Andrew Greene', family_name='Heather Martinez', correspondence=True, email='pperez@example.com', institute='Nichols Inc', orcid='1220106c-fc80-4804-be6f-0e7d8fc65f70'), Author(given_name='Tyler Vincent', family_name='Dean Tanner', correspondence=True, email='achen@example.com', institute='Schmidt Ltd', orcid='ce164e11-697f-44c3-a043-f1460957e2d8')], articles=[Article(title='Officer marriage bring throughout purpose.', subtitle='Her health certainly community field.', abstract='Drop focus natural record. Season service month until. Enjoy pick health.', authors=[Author(given_name='Ivan Rhodes', family_name='Richard Logan', correspondence=True, email='kmeza@example.net', institute='Baker-Ford', orcid='3ee7e

In [7]:
sorted_articles = sorted(dummy_articles, key=lambda x: x.first_page)

In [8]:
def safe_yaml_str(s: str) -> str:
    yaml_special_non_quote_chars = [':', '#', '|', '>', '&', '*', '{', '}', '[', ']', '%', '@', '`'] 
    if any(c in s for c in yaml_special_non_quote_chars):
        if '"' in s:
            return f"'{s}'"
        else:
            return f"\"{s}\""
    else:
        return s

def article_author_to_yaml_str(author: Author) -> str:
    name_s = f"  - name: {safe_yaml_str(author.given_name)} {author.family_name}"
    email_s = f"    email: {safe_yaml_str(author.email)}" if author.email else ""
    institute_s = f"    institute:\n    - {safe_yaml_str(author.institute)}" if author.institute else ""
    orcid_s = f"    ORCID: {safe_yaml_str(author.orcid)}" if author.orcid else ""
    correspondence_s = f"    correspondence: {str(author.correspondence).lower()}"

    l = (name_s, email_s, institute_s, orcid_s, correspondence_s)
    l_filtered = (x for x in l if x != "")
    s = "\n".join(l_filtered)

    return s

def keyword_to_yaml_str(keyword: str) -> str:
    s = f"- {safe_yaml_str(keyword)}"

    return s

def article_to_yaml_str(article: Article) -> str:
    title_s = f"- title: {safe_yaml_str(article.title)}"
    subtitle_s = f"  subtitle: {safe_yaml_str(article.subtitle)}" if article.subtitle else ""
    first_page_s = f"  first-page: {article.first_page}"
    last_page_s = f"  last-page: {article.last_page}"
    doi_s = f"  doi: {safe_yaml_str(article.doi)}"
    abstract_s = f"  abstract: |\n    {article.abstract}" if article.abstract else ""
    author_s = f"  author:\n{"\n".join((article_author_to_yaml_str(author)) for author in article.authors)}"
    galleys_s = f"  galleys:\n  - {safe_yaml_str(article.galleys)}" if article.galleys else ""
    keywords_s = f"  keywords:\n  {"\n  ".join((keyword_to_yaml_str(keyword)) for keyword in article.keywords)}" if article.keywords else ""

    l = (title_s, subtitle_s, first_page_s, last_page_s, doi_s, abstract_s, author_s, galleys_s, keywords_s)
    l_filtered = (x for x in l if x != "")
    s = "\n".join(l_filtered)

    return s


def journal_issue_editor_to_yaml_str(editor: Author) -> str:
    s = f"- {author_full_name(editor)}"

    return s

def journal_issue_to_yaml_str(journal_issue: JournalIssue) -> str:
    volume_s = f"volume: {journal_issue.volume}"
    issue_s = f"issue: {journal_issue.issue}"
    date_s = f"date: {journal_date(journal_issue)}"
    first_page_s = f"first-page: {journal_issue.first_page}"
    doi_s = f"doi: {safe_yaml_str(journal_issue.doi)}"
    issnprint_s = f"issnprint: {safe_yaml_str(journal_issue.issnprint)}"
    issuetitle_s = f"issuetitle: {safe_yaml_str(journal_issue.title)}"
    issueeditor_s = f"issueeditor:\n{"\n".join((journal_issue_editor_to_yaml_str(editor)) for editor in journal_issue.editors)}"
    articles_s = f"articles:\n{"\n".join((article_to_yaml_str(article)) for article in journal_issue.articles)}"

    l = (volume_s, issue_s, date_s, first_page_s, doi_s, issnprint_s, issuetitle_s, issueeditor_s, articles_s)
    l_filtered = (x for x in l if x != "")
    s = "\n".join(l_filtered)

    return s



In [9]:
print(journal_issue_to_yaml_str(dummy_journal_issue))

volume: 235
issue: 9920
date: September 1984
first-page: 4470
doi: ba7d69e7-7eb1-407e-9027-2a130c24bee5
issnprint: 2ba4a3ce-b4e8-4cff-a555-bc6eb0751765
issuetitle: Together plant difficult answer fill budget.
issueeditor:
- Heather Martinez, Andrew Greene
- Dean Tanner, Tyler Vincent
articles:
- title: Officer marriage bring throughout purpose.
  subtitle: Her health certainly community field.
  first-page: 2071
  last-page: 9454
  doi: 80fb7ef2-1a36-454b-819f-00347b034a3e
  abstract: |
    Drop focus natural record. Season service month until. Enjoy pick health.
  author:
  - name: Ivan Rhodes Richard Logan
    email: "kmeza@example.net"
    institute:
    - Baker-Ford
    ORCID: 3ee7e250-afef-4abe-a5c7-1bb749145b7b
    correspondence: true
  - name: Tyler Vincent Dean Tanner
    email: "achen@example.com"
    institute:
    - Schmidt Ltd
    ORCID: ce164e11-697f-44c3-a043-f1460957e2d8
    correspondence: true
  - name: Elizabeth Walker Tamara Lee
    email: "mariamiller@example.com"


In [10]:
with open('journal_issue.yaml', 'w') as f:
    f.write(journal_issue_to_yaml_str(dummy_journal_issue))

In [11]:
if []:
    print("empty list is true")
else:
    print("hey")


hey


In [12]:

def parse_month_year(month_year_raw: str) -> Tuple[TMonth, TYear]:
    if month_year_raw == "":
        raise ValueError(f"Could not parse '{month_year_raw}'. Please input a month-year date of the form 'January 2025'.")
    stripped = remove_extra_whitespace(month_year_raw)
    month_s, year_s = tuple(stripped.split(" "))
    month = MONTH_STR_INT[month_s]
    year = int(year_s)
    return month, year

def parse_editor_name(author_name_raw: str) -> Tuple[TGivenName, TFamilyName]:

    if author_name_raw == "":
        return "", ""

    split = author_name_raw.split(",")
    stripped = [remove_extra_whitespace(x) for x in split]

    if len(stripped) == 2:
        return stripped[1], stripped[0]
    if len(stripped) == 1:
        print(f"Warning: author name in YAML '{author_name_raw}' contains only one part")
        return stripped[0], ""
    else:
        print(f"Warning: could not parse author name '{author_name_raw}'. Skipping.")
        return "", ""

def read_journal_issue_from_yaml(filename: str, journal: str) -> JournalIssue:

    yaml: Dict[str, str | Dict[str, str | Dict[str, str]]] = load(open(filename, 'r'), Loader=Loader)

    month, year = parse_month_year(yaml.get('date', ""))

    journal_issue = JournalIssue(
        journal=journal,
        volume=yaml.get('volume', ""),
        issue=yaml.get('issue', ""),
        month=month,
        year=year,
        first_page=int(yaml.get('first-page', "")),
        doi=yaml.get('doi', ""),
        issn=yaml.get('issn', ""),
        issnprint=yaml.get('issnprint', ""),
        title=yaml.get('issuetitle', ""),

        editors=[Author(
            given_name=parse_editor_name(editor)[0],
            family_name=parse_editor_name(editor)[1],
            correspondence=False,
            email='',
            institute='',
            orcid=''
        ) for editor in yaml.get('issueeditor', "")],

        articles=[Article(
            title=article.get('title', ""),
            subtitle=article.get('subtitle', ""),
            abstract=remove_extra_whitespace(article.get('abstract', "")),
            authors=[Author(
                given_name=(author.get('name', "")),
                family_name="",
                correspondence=author.get('correspondence', ""),
                email=author.get('email', ""),
                institute=author.get('institute', [""])[0],
                orcid=author.get('ORCID', "")
            ) for author in article['author']],

            first_page=article.get('first-page', ""),
            last_page=article.get('last-page', ""),
            doi=article.get('doi', ""),
            galleys=article.get('galleys', [""])[0],
            keywords=[keyword for keyword in article.get('keywords', [])]

        ) for article in yaml['articles']]
    )

    return journal_issue


In [13]:
read_ji = read_journal_issue_from_yaml('journal_issue.yaml', "Test")

In [14]:
print(read_ji)

JournalIssue(journal='Test', volume=235, issue=9920, month=9, year=1984, first_page=4470, doi='ba7d69e7-7eb1-407e-9027-2a130c24bee5', issnprint='2ba4a3ce-b4e8-4cff-a555-bc6eb0751765', title='Together plant difficult answer fill budget.', editors=[Author(given_name='Andrew Greene', family_name='Heather Martinez', correspondence=False, email='', institute='', orcid=''), Author(given_name='Tyler Vincent', family_name='Dean Tanner', correspondence=False, email='', institute='', orcid='')], articles=[Article(title='Officer marriage bring throughout purpose.', subtitle='Her health certainly community field.', abstract='Drop focus natural record. Season service month until. Enjoy pick health.', authors=[Author(given_name='Ivan Rhodes Richard Logan', family_name='', correspondence=True, email='kmeza@example.net', institute='Baker-Ford', orcid='3ee7e250-afef-4abe-a5c7-1bb749145b7b'), Author(given_name='Tyler Vincent Dean Tanner', family_name='', correspondence=True, email='achen@example.com', i

In [15]:
dltc_read = read_journal_issue_from_yaml('dltc-backcatalog-model.yaml', "Dialectica")

In [16]:
with open('dltc-reconstructed.yaml', 'w') as file:
    file.write(journal_issue_to_yaml_str(dltc_read))