# Notebook to update hrefs in particular collections

In [106]:
import boto3
import json

In [107]:
AWS_ACCESS_KEY_ID = "[CHANGE ME]"
AWS_SECRET_ACCESS_KEY = "[CHANGE ME]"
AWS_SESSION_TOKEN = "[CHANGE ME]"

In [108]:
s3_client = boto3.client(
    "s3",
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    aws_session_token=AWS_SESSION_TOKEN,
)

The `update_json_href` function takes in a bucket_name, s3_prefix, old_href_substring and new_href_substring

In [109]:
def update_json_href(
    bucket_name, collection_name, old_href_substring, new_href_substring
):
    """Given the bucket name, s3 prefix,
       update all hrefs in the path bucketname/s3_prefix
       and update all the old_href_substring to be new_href_substring.

    Keyword arguments:
    bucket_name -- the s3 bucket name
    collection_name -- the collection name
    old_href_substring -- the string to replace in href
    new_href_substring -- the new href substring
    """
    s3 = s3_client
    s3_prefix = f"{collection_name}/"

    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=s3_prefix)

    json_keys = [
        item["Key"] for item in response["Contents"] if item["Key"].endswith(".json")
    ]

    for key in json_keys:
        response = s3.get_object(Bucket=bucket_name, Key=key)
        json_data = response["Body"].read().decode("utf-8")

        data = json.loads(json_data)
        for assets_key in data["assets"]:
            # Update href property
            data["assets"][assets_key]["href"] = data["assets"][assets_key][
                "href"
            ].replace(old_href_substring, new_href_substring)

        # Serialize updated JSON
        updated_json = json.dumps(data)

        # Upload updated JSON file back to S3, commented out the line below so it doesn't actually upload
        s3.put_object(Bucket=bucket_name, Key=key, Body=updated_json)
        print(f"Updated {key}")

The next cell calls update_json_href to update the hlsl30-ej-reprocessed and hlss30-ej-reprocessed collections from the veda-data-store bucket. Specifically, it updates the href instances of "covid-eo-data" to "veda-data-store"

In [110]:
update_json_href(
    "veda-data-store", "hlsl30-ej-reprocessed", "covid-eo-data", "veda-data-store"
)

Updated hlsl30-ej-reprocessed/2017/19QHA/HLS.L30.T19QHA.2017157T144341.v2.0/HLS.L30.T19QHA.2017157T144341.v2.0_stac-ej-reprocessed.json
Updated hlsl30-ej-reprocessed/2017/19QHA/HLS.L30.T19QHA.2017173T144347.v2.0/HLS.L30.T19QHA.2017173T144347.v2.0_stac-ej-reprocessed.json
Updated hlsl30-ej-reprocessed/2017/19QHA/HLS.L30.T19QHA.2017205T144356.v2.0/HLS.L30.T19QHA.2017205T144356.v2.0_stac-ej-reprocessed.json
Updated hlsl30-ej-reprocessed/2017/19QHA/HLS.L30.T19QHA.2017221T144403.v2.0/HLS.L30.T19QHA.2017221T144403.v2.0_stac-ej-reprocessed.json
Updated hlsl30-ej-reprocessed/2017/19QHA/HLS.L30.T19QHA.2017237T144407.v2.0/HLS.L30.T19QHA.2017237T144407.v2.0_stac-ej-reprocessed.json
Updated hlsl30-ej-reprocessed/2017/19QHA/HLS.L30.T19QHA.2017269T144414.v2.0/HLS.L30.T19QHA.2017269T144414.v2.0_stac-ej-reprocessed.json
Updated hlsl30-ej-reprocessed/2017/19QHA/HLS.L30.T19QHA.2017285T144419.v2.0/HLS.L30.T19QHA.2017285T144419.v2.0_stac-ej-reprocessed.json
Updated hlsl30-ej-reprocessed/2017/19QHA/HLS.L30

In [111]:
update_json_href(
    "veda-data-store", "hlss30-ej-reprocessed", "covid-eo-data", "veda-data-store"
)

Updated hlss30-ej-reprocessed/2017/19QHA/HLS.S30.T19QHA.2017193T150719.v2.0/HLS.S30.T19QHA.2017193T150719.v2.0_stac-ej-reprocessed.json
Updated hlss30-ej-reprocessed/2017/19QHA/HLS.S30.T19QHA.2017218T150721.v2.0/HLS.S30.T19QHA.2017218T150721.v2.0_stac-ej-reprocessed.json
Updated hlss30-ej-reprocessed/2017/19QHA/HLS.S30.T19QHA.2017233T150719.v2.0/HLS.S30.T19QHA.2017233T150719.v2.0_stac-ej-reprocessed.json
Updated hlss30-ej-reprocessed/2017/19QHA/HLS.S30.T19QHA.2017278T150721.v2.0/HLS.S30.T19QHA.2017278T150721.v2.0_stac-ej-reprocessed.json
Updated hlss30-ej-reprocessed/2017/19QHA/HLS.S30.T19QHA.2017293T150709.v2.0/HLS.S30.T19QHA.2017293T150709.v2.0_stac-ej-reprocessed.json
Updated hlss30-ej-reprocessed/2017/19QHA/HLS.S30.T19QHA.2017318T150721.v2.0/HLS.S30.T19QHA.2017318T150721.v2.0_stac-ej-reprocessed.json
Updated hlss30-ej-reprocessed/2017/19QHA/HLS.S30.T19QHA.2017323T150709.v2.0/HLS.S30.T19QHA.2017323T150709.v2.0_stac-ej-reprocessed.json
Updated hlss30-ej-reprocessed/2017/19QHA/HLS.S30