In [108]:
import sys
sys.path.append('../..')


In [109]:
import csv
from typing import Generator, Literal, Tuple, NamedTuple
from src.sdk.utils import remove_extra_whitespace

In [110]:
type TBibkey = str
type TFurtherReferences = Tuple[TBibkey, ...]
type TDependsOn = Tuple[TBibkey, ...]
type TFurtherReferencesBad = str
type TDependsOnBad = str

class Bibentry(NamedTuple):
    bibkey: TBibkey
    title: str
    notes: str
    crossref: str
    further_note: str
    further_references_good: TFurtherReferences
    further_references_bad: TFurtherReferencesBad
    depends_on_good: TDependsOn
    depends_on_bad: TDependsOnBad
    bootstrap_status: str
    bootstrap_error_message: str


type TStatus = Literal["success", "error", "exception"]
type TMessage = str

type TReport = Tuple[
    Bibentry,
    TStatus,
    TMessage
]


In [111]:
def load_processed_bibentries_csv(filename: str, encoding: str) -> Generator[Bibentry, None, None]:

    with open(filename, 'r', encoding=encoding) as csvfile:
        csvreader = csv.DictReader(csvfile)
        for row in csvreader:
            further_references = tuple(
                remove_extra_whitespace(row['further_references_good']).split(',')
            )
            depends_on = tuple(
                remove_extra_whitespace(row['depends_on_good']).split(',')
            )
            yield Bibentry(
                bibkey=row['bibkey'],
                title=row['title'],
                notes=row['notes'],
                crossref=row['crossref'],
                further_note=row['further_note'],
                further_references_good=further_references,
                further_references_bad=row['further_references_bad'],
                depends_on_good=depends_on,
                depends_on_bad=row['depends_on_bad'],
                bootstrap_status=row['status'],
                bootstrap_error_message=row['error_message']
            )



In [112]:
def validate_bibentry(bibentry: Bibentry) -> None:

    validations = {
        "bibkey": (isinstance(bibentry.bibkey, str) and bibentry.bibkey is not None, f"bibkey should be a non-empty string, got {type(bibentry.bibkey)}"),
        "further_references_bad": (bibentry.further_references_bad == "" or bibentry.further_references_bad is None, f"further_references_bad should be empty, got {bibentry.further_references_bad}"),
        "depends_on_bad": (bibentry.depends_on_bad == "" or bibentry.depends_on_bad is None, f"depends_on_bad should be empty, got {bibentry.depends_on_bad}"),
    }

    validation_message = ""
    for key, (valid, message) in validations.items():
        if not valid:
            validation_message += f"{key}: {message}\n"

    if validation_message != "":
        raise ValueError(validation_message)

    return None

In [113]:
def reflexive_bibentry(bibentry: Bibentry) -> bool:
    """
    Returns true if the bibkey of the bibentry appears in either further_references_good or depends_on_good
    """
    further_references = bibentry.further_references_good
    depends_on = bibentry.depends_on_good
    bibkey = bibentry.bibkey

    return bibkey in further_references or bibkey in depends_on

In [114]:
def process_bibentry(bibentry: Bibentry) -> TReport:
    try:
        validate_bibentry(bibentry)

        if reflexive_bibentry(bibentry):
            return (bibentry, "error", "Reflexive bibentry")

        return (bibentry, "success", "")

    except ValueError as e:
        return (bibentry, "error", str(e))
    
    except Exception as e:
        return (bibentry, "exception", str(e))

In [115]:
def write_reports_to_csv(reports: Generator[TReport, None, None], filename: str, encoding: str) -> None:

    with open(filename, 'w', encoding=encoding) as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow([
            "bibkey",
            "title",
            "notes",
            "crossref",
            "further_note",
            "further_references_good",
            "further_references_bad",
            "depends_on_good",
            "depends_on_bad",
            "bootstrap_status",
            "bootstrap_error_message",
            "status",
            "message",
        ])

        for bibentry, status, message in reports:
            csvwriter.writerow([
                bibentry.bibkey,
                bibentry.title,
                bibentry.notes,
                bibentry.crossref,
                bibentry.further_note,
                ", ".join(bibentry.further_references_good),
                bibentry.further_references_bad,
                ", ".join(bibentry.depends_on_good),
                bibentry.depends_on_bad,
                bibentry.bootstrap_status,
                bibentry.bootstrap_error_message,
                status,
                message,
            ])

In [116]:
bootstrap_filename = "../../data/bootstrapped_bibentities.csv"

bibentries = load_processed_bibentries_csv(bootstrap_filename, 'utf-8')

reports = (process_bibentry(bibentry) for bibentry in bibentries)

write_reports_to_csv(reports, "../../data/bibkey_reflexivity_report.csv", 'utf-8')