# Zotero API

Zotero API Params

In [None]:
# Zotero API key
API_KEY = "pYOeOefyKM6Z5yhPKHZRAujA"
# Zotero user name
USER_ID = "13352140"
LIBRARY_ID = USER_ID        # is USER_ID for personal library
LIBRARY_TYPE = "user"       # "group"

In [None]:
# Collection name
COLLECTION_NAME = "Collection1"         # None

In [None]:
"""
%pip install pyzotero
%pip install bibtexparser
"""

In [None]:
from pyzotero import zotero
import json
import bibtexparser


def get_item_folder(library_id, library_type, api_key, collection_name=None):
    # Initialize Pyzotero client
    zot = zotero.Zotero(library_id, library_type, api_key)

    # Get collection id
    collection_key = None
    if collection_name is not None:
        collections = zot.collections()
        for i_collection in collections:
            if i_collection["data"]["name"] == COLLECTION_NAME:
                # print(i_collection['data']['name'], i_collection['data']['key'])
                collection_key = i_collection['data']['key']
                break

    # Get file information
    # Save the result as dict: {key: file_path}
    item_key_folder = {}
    items = zot.collection_items(collection_key)        # format="bibtex"
    for i_item in items:
        # Because in the result, one item 
        if "links" in i_item.keys() and "attachment" in i_item["links"].keys():
            print(json.dumps(i_item, indent=4))
            print("***********************")
            key = i_item["key"]
            folder = i_item["links"]["attachment"]["href"]
            item_key_folder = {key: folder}
            continue
        """
        if i_item["data"]["itemType"] == "attachment": 
            key = i_item["key"]
            folder = i_item["links"]["attachment"]["href"]
            item_key_folder = {key: folder}
        """
    
    return item_key_folder

    # for i in items:
    #     pretty_json = json.dumps(i, indent=4)
    #     print(pretty_json)


    # # The items are now formatted as BibTeX entries
    # # You can directly write them to a file
    # with open('exported_library.bib', 'w', encoding='utf-8') as bibfile:
    #     for item in items:
    #         bibfile.write(item['content'] + '\n')

    # print("Export completed.")


    # with open('from_collection.bib', 'w', encoding='utf-8') as bibtex_file:
    #     bibtexparser.dump(items, bibtex_file)

res = get_item_folder(LIBRARY_ID, LIBRARY_TYPE, API_KEY, collection_name=COLLECTION_NAME)
res

Table Explanation

- items
    + itemID:
    + itemTypeID
    + dateAdded:
    + dateModified:
    + clientDateModified:
    + libraryID:
    + key:
    + version:
    + synced: 
- itemTypes:
    + itemTypeID: 3=attachment
    + typeName: document type, like annotation, artwork, attachment, bill, etc.
    + templateItemTypeID:
    + display:
- itemDataValue: metadata id and value
    + valueID: 
    + value: metadata value
- itemData: items and their corresponding field id and value
    + itemID:
    + fieldID:
    + valueID:
- fields
    + fieldID
    + fieldName: field name
    + fieldFormatID: Useless
- fieldsCombined
    + fieldID
    + fieldName: field name
    + custome: 0 for not custom
- itemAttachments: the attchment of the items
    + itemID:
    + parentItemID:
    + path: file path
    + storageHash: the Hash of the subfolders (Probably MD5 encrypted)
- collections
    + collectionID: collection ID, like 1, 2, 3
    + collectionName: collection name, like Ismail, WDS, Ashley
    + parentCollectionID: parent collection ID
    + key: like EJRKKRU3
    + libraryID: like 1



# Export from Zotero

Use API to get metadata information from server and dump as biblatex

- [pyzotero API](https://github.com/urschrei/pyzotero)
- Append the file path in the biblatex

Solution1: Using bibtexparser

- It can't parse the non-standard entry, like article, etc
    + The following solution might work but NOT RECOMMENDED.
    + Could replace all ENTRYTYPE into misc for simplicity, since this might not be that important and accurate. 
    + Since ENTRYTYPE won't show up anyways, if we manually copy paste the content.
- But it can parse ID(@) and ENTRYTYPE


In [None]:
import bibtexparser
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import convert_to_unicode
import csv

# Function to load and parse the BibTeX file
def _load_bibtex(bib_file_path):
    with open(bib_file_path, encoding='utf-8') as bibtex_file:
        parser = BibTexParser(common_strings=True)
        parser.customization = convert_to_unicode
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
    return bib_database

# Function to find all unique field names in the BibTeX database
def _get_bibtex_fieldnames(bib_database):
    fieldnames = []
    for entry in bib_database.entries:
        for key in entry.keys():
            if key not in fieldnames:
                fieldnames.append(key)
    return fieldnames

# Function to write the BibTeX database to a CSV file
def _write_bibtex2csv(bib_database, csv_file_path, fieldnames):
    with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for entry in bib_database.entries:
            writer.writerow({field: entry.get(field, '') for field in fieldnames})

# Main conversion function
def convert_bibtex_to_csv(bib_file_path, csv_file_path):
    bib_database = _load_bibtex(bib_file_path)
    fieldnames = _get_bibtex_fieldnames(bib_database)
    _write_bibtex2csv(bib_database, csv_file_path, fieldnames)


Solution 2: Using pybtex

- It treats every value as raw text, without changing any format, which leads to the " and {{}} problems


In [None]:
from pybtex.database import parse_file
import csv

# Function to load and parse the BibTeX file using Pybtex
def _load_biblatex(bib_file_path):
    return parse_file(bib_file_path, bib_format="bibtex")

# Function to find all unique field names in the BibTeX database
def _get_biblatex_fieldnames(bib_database):
    fieldnames = set()
    for entry in bib_database.entries.values():
        for field in entry.fields.keys():
            fieldnames.add(field)
        # Include persons (authors, editors) as a possible field
        for role in entry.persons.keys():
            fieldnames.add(role)
    
    # Add ID and ENTRYTYPE
    fieldnames.add("ID")
    fieldnames.add("ENTRYTYPE")

    return list(fieldnames)

# Function to write the Pybtex database to a CSV file
def _write_biblatex2csv(bib_database, csv_file_path, fieldnames):
    # Ensure 'ENTRYTYPE' and 'ID' are part of the fieldnames if you want them in the CSV
    if 'ENTRYTYPE' not in fieldnames:
        fieldnames.append('ENTRYTYPE')
    if 'ID' not in fieldnames:
        fieldnames.append('ID')

    with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for key, entry in bib_database.entries.items():
            # Initialize ID and ENTRYTYPE
            row = {'ID': key, 
                   'ENTRYTYPE': entry.type}
            
            # Add fields
            for field in fieldnames:
                if field in entry.fields:
                    row.update({field: entry.fields.get(field, '')})
            
            # Add persons information, like authors/editors
            for role in entry.persons:
                if role in fieldnames:
                    row[role] = ' and '.join(str(person) for person in entry.persons[role])
            writer.writerow(row)

# Main conversion function using Pybtex
def convert_biblatex_to_csv(bib_file_path, csv_file_path):
    bib_database = _load_biblatex(bib_file_path)
    fieldnames = _get_biblatex_fieldnames(bib_database)
    _write_biblatex2csv(bib_database, csv_file_path, fieldnames)


In [None]:
import os

__TXT = ['txt', 'doc', 'docx', 'pdf', 'rtf', 'html', 'htm', 'xml', 'md', 'epub', 'mobi', 'azw']
__PIC = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'svg', 'webp']
__WEB = ['html', 'htm', 'css', 'js']
__AUDIO = ['mp3', 'wav', 'aac', 'flac', 'alac', 'ogg', 'm4a']
__VIDEO = ['mp4', 'avi', 'mov', 'wmv', 'flv', 'mkv', 'webm']
__ARCHIVE = ['zip', 'rar', '7z', 'tar', 'gz', 'bz2', 'xz']
__EXE = ['exe', 'msi', 'bin', 'sh', 'bat']
__PPT = ['ppt', 'pptx', 'odp']
__EXCEL = ['xls', 'xlsx', 'ods', 'csv']

_EXT_LIST = __TXT + __PIC + __WEB + __AUDIO + __VIDEO + __ARCHIVE + __EXE + __PPT + __EXCEL

# Usually don't need to run it because the files are normally named.
# Add extension to the file name in the file manager
def add_extension_file(directory, ext="pdf", print_change=True):
    # Count how many files have been changed
    change_cnt = 0

    if print_change:
        print("Now change the file extension.")

    # Walk through the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Skip hidden files
            if file.startswith('.'):
                continue
            
            # Check if the file extension falls in the list
            file_extension = file.split('.')[-1].lower()
            if file_extension not in _EXT_LIST:
                old_path = os.path.join(root, file)
                # The weird case that some files without ext might have blankspaces in the end, but no blankspace in bib file path 
                new_path = old_path.strip() + "." + ext
                os.rename(old_path, new_path)
                
                # Count +1
                change_cnt += 1

                if print_change:
                    print(f'Renamed "{old_path}" to "{new_path}"')

    print(f"File extension changed number: {change_cnt}")

# Add extension to records in the csv
def add_extension(df, col="file", ext="pdf"):
    # Function to append '.pdf' if necessary
    def _append_ext(filename):
        if filename == "":
            return filename

        file_extension = filename.split('.')[-1].lower()
        if file_extension not in _EXT_LIST:
            # The weird case that some files without ext might have blankspaces in the end, but no blankspace in bib file path 
            filename = filename.strip()
            
            return filename + "." + ext

        return filename


    if col not in df.columns:
        return df
    
    df[col] = df[col].apply(_append_ext)

    return df
