# Importing new entries from Zenodo communities
This notebook allows to import entries from Zenodo communities. It does not re-import entries that are already in our database.

In [1]:
import os
import requests
import bia_bob
import shutil
import pandas as pd

In [2]:
# workaround: Until our utilities are a python library, we need to copy it here.
shutil.copy('../scripts/generate_link_lists.py', './generate_link_lists.py')

'./generate_link_lists.py'

In [3]:
from generate_link_lists import load_dataframe, update_yaml_file, complete_zenodo_data

In [4]:
token = os.getenv('ZENODO_API_KEY')
community = 'nfdi4bioimage'

response = requests.get('https://zenodo.org/api/records',
                        params={'communities': community,
                                'access_token': token})

## That's what's listed in the community

In [5]:
online_data = response.json()
hits = online_data["hits"]["hits"]

In [6]:
len(hits)

25

In [7]:
urls = [u["links"]["self_html"] for u in hits]
urls

['https://zenodo.org/records/14014252',
 'https://zenodo.org/records/14013026',
 'https://zenodo.org/records/14001388',
 'https://zenodo.org/records/13991322',
 'https://zenodo.org/records/13837146',
 'https://zenodo.org/records/13831274',
 'https://zenodo.org/records/11503289',
 'https://zenodo.org/records/13684187',
 'https://zenodo.org/records/13640979',
 'https://zenodo.org/records/13380289',
 'https://zenodo.org/records/13168693',
 'https://zenodo.org/records/12699637',
 'https://zenodo.org/records/11548617',
 'https://zenodo.org/records/11501662',
 'https://zenodo.org/records/11350689',
 'https://zenodo.org/records/11235513',
 'https://zenodo.org/records/11146807',
 'https://zenodo.org/records/11109616',
 'https://zenodo.org/records/11031747',
 'https://zenodo.org/records/10939520',
 'https://zenodo.org/records/10886750',
 'https://zenodo.org/records/10808486',
 'https://zenodo.org/records/10793700',
 'https://zenodo.org/records/10730424',
 'https://zenodo.org/records/10687659']

In [8]:
url = urls[0]

data = complete_zenodo_data(url)
data


https://zenodo.org/api/records/14014252


{'url': ['https://zenodo.org/records/14014252',
  'https://doi.org/10.5281/zenodo.14014252'],
 'name': 'Excel template for adding Key-Value Pairs to images',
 'publication_date': '2024-10-30',
 'description': 'This Excel Workbook contains some simple Macros to help with the generation of a .csv in the necessary format for Key-Value pair annotations of images in OMERO.\nThe format is tailored for the OMERO.web script&nbsp;"KeyVal_from_csv.py"&nbsp; (from the version &lt;=5.8.3 of the core omero-scripts).\nAttached is also a video of Thomas Zobel, the head of the imaging core facility Uni M&uuml;nster, showcasing the use of the Excel workbook.The video uses a slightly older version of the workbook and OMERO, but the core functionality remains unchanged.\nPlease keep in mind, that the OMERO.web script(s) to handle Key-Value Pairs from/to .csv files will undergo a major change very soon.This might break the compatibility with the format used now for the generated .csv from the workbook.',


## Checking what we already have

In [9]:
df = load_dataframe("../resources/")

all_urls = str(df["url"].tolist())
all_urls

Adding nfdi4bioimage.yml


"['https://focalplane.biologists.com/2023/07/26/sharing-your-poster-on-figshare/', 'https://biapol.github.io/blog/marcelo_zoccoler/omero_scripts/readme.html', 'https://biapol.github.io/blog/robert_haase/browsing_idr/readme.html', 'https://biapol.github.io/blog/mara_lampert/getting_started_with_mambaforge_and_python/readme.html', 'https://datamanagement.hms.harvard.edu/news/promoting-data-management-nikon-imaging-center-and-cell-biology-microscopy-facility', 'https://blog.delmic.com/data-handling-in-large-scale-electron-microscopy', 'https://focalplane.biologists.com/2023/06/01/tracking-in-napari/', 'https://focalplane.biologists.com/2023/05/03/feature-extraction-in-napari/', 'https://focalplane.biologists.com/2023/03/30/annotating-3d-images-in-napari/', 'https://focalplane.biologists.com/2022/12/08/managing-scientific-python-environments-using-conda-mamba-and-friends/', 'https://focalplane.biologists.com/2023/04/13/quality-assurance-of-segmentation-results/', 'https://focalplane.biolog

## Identifying entries we are missing yet

In [10]:
new_data = []
for url in urls:
    print(url)
    data = complete_zenodo_data(url)
    
    if isinstance(data["url"], str):
        data["url"] = [data["url"]]

    not_in_data_yet = True
    for u in data["url"]:
        if u in all_urls:
            print("Yes")
            not_in_data_yet = False
        else:
            print("No")     

    if not_in_data_yet:
        new_data.append(data)

#new_data

https://zenodo.org/records/14014252
https://zenodo.org/api/records/14014252
Yes
Yes
https://zenodo.org/records/14013026
https://zenodo.org/api/records/14013026
Yes
Yes
https://zenodo.org/records/14001388
https://zenodo.org/api/records/14001388
Yes
Yes
https://zenodo.org/records/13991322
https://zenodo.org/api/records/13991322
Yes
Yes
https://zenodo.org/records/13837146
https://zenodo.org/api/records/13837146
No
No
https://zenodo.org/records/13831274
https://zenodo.org/api/records/13831274
Yes
Yes
https://zenodo.org/records/11503289
https://zenodo.org/api/records/11503289
No
No
https://zenodo.org/records/13684187
https://zenodo.org/api/records/13684187
No
No
https://zenodo.org/records/13640979
https://zenodo.org/api/records/13640979
No
No
https://zenodo.org/records/13380289
https://zenodo.org/api/records/13380289
No
No
https://zenodo.org/records/13168693
https://zenodo.org/api/records/13168693
Yes
Yes
https://zenodo.org/records/12699637
https://zenodo.org/api/records/12699637
No
No
http

## Saving new entries

In [11]:
import yaml
zenodo_yml = yaml.dump(new_data).replace("\n", "\n  ")
print(zenodo_yml)

with open('../resources/nfdi4bioimage.yml', 'a') as file:
    file.write("\n")
    file.write(zenodo_yml)

- authors: "J\xFCngst, Christian, Zentis, Peter"
    description: Raw microscopy image from the NFDI4Bioimage calendar October 2024
    license: cc-by-4.0
    name: NFDI4Bioimage Calendar 2024 October; original image
    num_downloads: 8
    publication_date: '2024-09-25'
    url:
    - https://zenodo.org/records/13837146
    - https://doi.org/10.5281/zenodo.13837146
  - authors: Dvoretskii, Stefan
    license: cc-by-4.0
    name: Insights from Acquiring Open Medical Imaging  Datasets for Foundation Model
      Development
    num_downloads: 41
    publication_date: '2024-04-10'
    url:
    - https://zenodo.org/records/11503289
    - https://doi.org/10.5281/zenodo.11503289
  - authors: Wetzker, Cornelia, Schlierf, Michael
    description: The poster is part of the work of the German consortium NFDI4BIOIMAGE
      funded by the Deutsche Forschungsgemeinschaft (DFG grant number NFDI 46/1, project
      number 501864659).
    license: cc-by-4.0
    name: RESEARCH DATA MANAGEMENT on Campu