In [9]:
import os
import json
import numpy as np
from dotenv import load_dotenv, dotenv_values

## Functions

These functions are also in the conabio module

In [3]:
def login_alfresco(api_key):
    import requests
    """
    Creates a session in Alfresco
    
    Parameters
    ----------
    api_key : string
        Api key that can come from the .env credentials

    Return
    ------
    session : requests.Session
    """
    try:
        session = requests.Session()
        session.headers.update({'x-api-key': api_key})

        return session
    except Exception as e:
        print("Login failed: ", e)


In [30]:
def save_json(dictionary, file_path_name, overwrite=True):
    """
    Save a dictionary
    
    Parameters
    ----------
    dictionary : dict
    
    file_path_name : string
        Complete file path with the name of the file
    
    overwrite : boolean
        Default is True
    """
    # Serializing json
    json_object = json.dumps(dictionary, indent=4)

    outcome = "w"

    if not overwrite:
        outcome = "w+"

    # Writing to sample.json
    with open(f"{file_path_name}", outcome) as outfile:
        outfile.write(json_object)

### Load the environment set at the root (.env)

In [2]:
load_dotenv()
CONFIG = dotenv_values()

### Parameters

In [28]:
CUMULUS = 92
DATE = "2021-07-29T00:00:00.000+0000" 

OUTPUT_PATH = "../../results/search/"

### Constants

In [34]:
MAX_ITEMS = 5000
FILE_TYPE = "Audio"

### Create your query

In this query we want to search for the audio in the cumulus 92 that were deployed in 29-02-2021

In [25]:
query =  f"+TYPE: \"sipecam:{FILE_TYPE}\" AND (sipecam:CumulusName:\"{CUMULUS}\") AND (sipecam:DateDeployment:\"{DATE}\")"

### Call Action

In [None]:
if CONFIG.get("ALFRESCO_API_ENDPOINT") is None or CONFIG.get("ALFRESCO_API_KEY") is None:
    raise Exception("Keys not detected")
else:    
    skipcount = 0
    end_of_pagination = False
    saved_files = []
    
    # A cumulus can have more than the MAX_ITEMS allowed in the pagination, so
    # a loop is necessary.
    while not end_of_pagination:
        
        session = login_alfresco(CONFIG.get("ALFRESCO_API_KEY"))

        req = session.post(CONFIG.get("ALFRESCO_API_ENDPOINT"),
                           data=json.dumps({
                               "query": {
                                   "query": query,
                                   "language": "afts"
                               },
                               "include": ["properties", "path"],
                               "sort": [{"type": "FIELD", "field": "cm:name", "ascending": "false"}],
                               "paging": {
                                   "maxItems": MAX_ITEMS,
                                   "skipCount": skipcount
                               }
                           })
                        )

        result = req.json()
        
        try:
            if not result["list"]["pagination"]["hasMoreItems"]:
                end_of_pagination = True
        except:
            if result["error"]:
                raise Exception(result["error"])

        file_name = f"{OUTPUT_PATH}/search_result_{FILE_TYPE}_{CUMULUS}_{skipcount}.json"
        # Every pagination will be saved as a json
        save_json(result, file_name)
        saved_files.append(file_name)
        skipcount += MAX_ITEMS