# Pre-requisites - IMPORTANT!!!

In order to run properly this notebook, you need to:

1. Have a `.env` at the root of your project (You can use `.env.example` with the values on it)
2. Run `export PYTHONPATH=$PWD` in the root of the project

In [19]:
import os
import json
import numpy as np
from dotenv import load_dotenv, dotenv_values

## Functions

These functions are also in the conabio module

In [20]:
def login_alfresco(api_key):
    import requests
    """
    Creates a session in Alfresco
    
    Parameters
    ----------
    api_key : string
        Api key that can come from the .env credentials

    Return
    ------
    session : requests.Session
    """
    try:
        session = requests.Session()
        session.headers.update({'x-api-key': api_key})

        return session
    except Exception as e:
        print("Login failed: ", e)


In [21]:
def save_json(dictionary, file_path_name, overwrite=True):
    """
    Save a dictionary
    
    Parameters
    ----------
    dictionary : dict
    
    file_path_name : string
        Complete file path with the name of the file
    
    overwrite : boolean
        Default is True
    """
    # Serializing json
    json_object = json.dumps(dictionary, indent=4)

    outcome = "w"

    if not overwrite:
        outcome = "w+"

    # Writing to sample.json
    with open(f"{file_path_name}", outcome) as outfile:
        outfile.write(json_object)

In [None]:
def read_json(path):
    f = open(path)
    json_file = json.load(f)

    return json_file

In [None]:
def save_list_as_csv(path, header, list_path, overwrite=True):
    import csv
    write_mode = "w"
    if not overwrite:
        write_mode = "w+"

    with open(path, write_mode) as f:
        write = csv.writer(f)
        write.writerow(header)
        write.writerows(list_path)

### Load the environment set at the root (.env)

In [22]:
load_dotenv()
CONFIG = dotenv_values()

### Parameters

- The cumulus you want to extract videos from

- Date intervals you want the videos from 

- The output path were the results are going to be saved


In [23]:
CUMULUS = 92
MIN_DATE = "2021-11-01" 
MAX_DATE = "2021-12-01" 

OUTPUT_PATH = "../../../results/search"

### Constants

In [28]:
MAX_ITEMS = 5000
FILE_TYPE = "Video"
BUCKET_NAME = "sipecam-open-data"

### Create your query

In this query we want to search for the video in the cumulus 92 that were deployed in November 2021

In [29]:
query = f"+TYPE: \"sipecam:{FILE_TYPE}\" AND +(sipecam:DateDeployment: [{MIN_DATE} TO {MAX_DATE}]) AND +(sipecam:CumulusName:92)"

### Call Action

In [30]:
if CONFIG.get("ALFRESCO_API_ENDPOINT") is None or CONFIG.get("ALFRESCO_API_KEY") is None:
    raise Exception("Keys not detected")
else:    
    skipcount = 0
    end_of_pagination = False
    saved_files = []
    
    # A cumulus can have more than the MAX_ITEMS allowed in the pagination, so
    # a loop is necessary.
    while not end_of_pagination:
        
        session = login_alfresco(CONFIG.get("ALFRESCO_API_KEY"))

        req = session.post(CONFIG.get("ALFRESCO_API_ENDPOINT"),
                           data=json.dumps({
                               "query": {
                                   "query": query,
                                   "language": "afts"
                               },
                               "include": ["properties", "path"],
                               "sort": [{"type": "FIELD", "field": "cm:name", "ascending": "false"}],
                               "paging": {
                                   "maxItems": MAX_ITEMS,
                                   "skipCount": skipcount
                               }
                           })
                        )

        result = req.json()
        
        try:
            if not result["list"]["pagination"]["hasMoreItems"]:
                end_of_pagination = True
        except:
            if result["error"]:
                raise Exception(result["error"])

        file_name = f"{OUTPUT_PATH}/search_result_{FILE_TYPE}_{CUMULUS}_{skipcount}.json"
        # Every pagination will be saved as a json
        save_json(result, file_name)
        saved_files.append(file_name)
        skipcount += MAX_ITEMS
        
    path_list = []

    # Afterward we only want to extract the path in order to find easily the files
    for json_file in saved_files:
        result = read_json(json_file)

        totalItems = result["list"]["pagination"]["totalItems"]
        entries_list = (result["list"]["entries"])

        for entry in entries_list:
            complete_path = f'{entry["entry"]["path"]["name"]}/{entry["entry"]["name"]}'

            # This replacement name will depend on your mounting path.
            complete_path = complete_path.replace("/Company Home/Sites/sipecam/documentLibrary/", f"data/")
            path_list.append([complete_path])

    path_csv = f"{OUTPUT_PATH}/{FILE_TYPE}_path_{CUMULUS}.csv"
    save_list_as_csv(path_csv, ["item"], path_list)

## Access to one object

In [None]:
import boto3

In [None]:
s3_client = boto3.client('s3')

## See if the video exists on the bucket

In [None]:
result = s3_client.list_objects_v2(Bucket=BUCKET_NAME, Prefix=path)

if 'Contents' in result:
    print("Key exists in the bucket.")
    obj = s3_client.get_object(Bucket=BUCKET_NAME, Key=path)
    object_stream = obj['Body'].read()
else:
    print("Key doesn't exist in the bucket.")