Please follow the setup instructions in [DEVELOPER.md](../../DEVELOPER.md) before running this notebook.

In [None]:
import json
import numpy as np
import pyarrow as pa
import uuid

from blob_access_basic import upload_blob, download_blob_as_file, download_blob_with_pandas
from data_access import list_objects, upload_object, download_object_by_name
from notebook_helpers import format_objects
import tools.example_instances

# Listing objects
- [GET https://<host>/<org_id>/objects?path=<path>]()
- returns a list of items:
  - folders have a `name` and in `links.list` a URL to list that folder
  - files have a `name`, `schema`, `version`, and in `links.download` a URL to download that object
  - filtering, pagination, history: TBD

In [None]:
format_objects(list_objects, path='/')

# Uploading objects

### 1. Upload blobs to Azure
- write tables to Parquet files and calculate SHA-256 hashes
  - if the table is generated on-the-fly (streamed), use a UUID instead of the hash. This breaks automatic deduplication.
- request upload URLs: [PUT https://<host>/<org_id>/data]() with the list of hashes/UUIDs
- for each hash/UUID this returns an `exists` flag, and if it does not exist, an upload URL
- upload the Parquet files to the upload-urls (Azure blob storage)

In [None]:
imported_points = np.loadtxt("d:\\Topo.csv", skiprows=1, delimiter=',')
schema = pa.schema([("x", pa.float64()), ("y", pa.float64()), ("z", pa.float64())])

upload_result = upload_blob(imported_points, schema)
upload_result

### 2. Build the object
- follow the schema to build a JSON object
- add the hashes/uuids from the previous step

In [None]:
bbox_min = np.min(imported_points, axis=0)
bbox_max = np.max(imported_points, axis=0)
point_set =  {
  "$schema": "/objects/pointset/1.0.1/pointset.schema.json",
  "id": str(uuid.uuid4()),
  "name": "Topography",
  "description": "Our imported topography points",
  "locations": {
    "coordinates": {
      "width": 3,
      "data_type": "float64",
      "length": len(imported_points),
      "data": upload_result.hash
    }
  },
  "bounding_box": {
    "min_x": bbox_min[0],
    "max_x": bbox_max[0],
    "min_y": bbox_min[1],
    "max_y": bbox_max[1],
    "min_z": bbox_min[2],
    "max_z": bbox_max[2],
  },
  "coordinate_reference_system": {
    "epsg_code": 2048
  },
  "attributes": []
}
point_set_json = json.dumps(point_set, indent=4)
print(point_set_json)

### 3. Optional: validate the object
- we use [jsonschema](https://pypi.org/project/jsonschema/) for validation
- because of the use of composition, validation errors are usually not very helpful

In [None]:
schema = tools.example_instances.example_schema_dict(point_set)
tools.example_instances.validate_data(point_set, schema)

### 4. Upload the object to the Geoscience Object API
- [POST https://<host>/<org_id>/objects/<path>/<name>]()
- returns the object, version, and download URL

In [None]:
upload_object(point_set_json, name="pointset_1.json", path='/examples/pointsets')

# Downloading objects

### 1. Download the object from the Geoscience Object API
- use the download URL returned by the List Objects request or build it from a known path+name
- [GET https://<host>/<org_id>/objects/<path>/<name>]()

In [None]:
downloaded_object = download_object_by_name(name="pointset_1.json", path='/examples/pointsets')
coordinates_hash = downloaded_object.locations.coordinates.data
downloaded_object

###  2. Download blobs from Azure
- collect all blob-hashes/UUIDs from the object
- request download URLs: [POST https://<host>/<org_id>/data]() with the list of hashes/UUIDs
- for each hash/UUID this returns an 'exists' flag and if it exists, a download URL
- download the blob

In [None]:
# with pandas
download_blob_with_pandas(coordinates_hash)

# with azure-storage-blob
download_blob_as_file(coordinates_hash)