In [1]:
from itertools import batched

import rispy
from dotenv import load_dotenv
from tqdm.notebook import tqdm

from bigger_picker.airtable import AirtableManager
from bigger_picker.asana import AsanaManager
from bigger_picker.batchtracker import BatchTracker
from bigger_picker.integration import IntegrationManager
from bigger_picker.openai import OpenAIManager
from bigger_picker.rayyan import RayyanManager

load_dotenv()

True

In [3]:
airtable = AirtableManager()
asana = AsanaManager()
openai = OpenAIManager()
rayyan = RayyanManager("../rayyan_tokens.json")
batchtracker = BatchTracker()
integration = IntegrationManager(
    asana_manager=asana,
    airtable_manager=airtable,
    openai_manager=openai,
    rayyan_manager=rayyan,
    batch_tracker=batchtracker,
    debug=True,
)

In [6]:
with open("Scopus Mental Health.ris") as risfile:
    mh_entries = rispy.load(risfile)

with open("Scopus Mental Health_1.ris") as risfile:
    mh_entries += rispy.load(risfile)

with open("Scopus Mental Health Combined.ris", "w") as risfile:
    rispy.dump(mh_entries, risfile)

In [None]:
ris_files = {
    "Academic achievement": "Scopus Academic.ris",
    "Cognition": "Scopus Cognition.ris",
    "Mental health": "Scopus Mental Health Combined.ris",
    "Wellbeing": "Scopus Wellbeing.ris",
}

results = {}
no_match = {}

for label, filename in ris_files.items():
    with open(filename) as risfile:
        entries = rispy.load(risfile)

    for entry in entries:
        article_doi = entry.get("doi", None)
        if not article_doi:
            no_match.setdefault(label, []).append(entry)
            continue
        article_doi = article_doi.removeprefix("https://doi.org/").lower().strip()
        results.setdefault(article_doi, []).append(label)

In [None]:
for key, entries in no_match.items():
    print(f"{key}: {len(entries)} entries without DOI")

In [None]:
# Iterate through results, check if DOI is already in Rayyan
# If so, update the record with new labels, and update Airtable (and Asana?)
# If not, add to no_match based on which labels it has (must sort labels first)
# Create new RIS files for each combination of labels for manual upload

In [None]:
for key, entries in no_match.items():
    with open(f"{key} unmatches.ris", "w") as risfile:
        rispy.dump(entries, risfile)

Academic achievement: 1267 entries without DOI
Cognition: 546 entries without DOI
Mental health: 655 entries without DOI
Wellbeing: 482 entries without DOI
