# Notebook for testing downhole collection construction

This notebook is for testing the creation of a DownholeCollection (intermediary object) and conversion to an Evo Downhole Collection.

A cell at the end of this notebook is available to publish the result if desired.

This notebook's main purpose is testing each part of the GEF conversion and adding in an IntervalTable as part of the final payload.

In [None]:
import os
from evo.notebooks import ServiceManagerWidget

# Credentials can be provided from .env or filled into second params below.
# Use `uv run --env-file .env` to load environment variables from .env file.
client_id = os.getenv("EVO_CLIENT_ID", "")
base_uri = os.getenv("EVO_BASE_URI", "")
discovery_url = os.getenv("EVO_DISCOVERY_URL", "")

manager = await ServiceManagerWidget.with_auth_code(
    client_id=client_id, base_uri=base_uri, discovery_url=discovery_url
).login()

In [None]:
from evo.data_converters.common import create_evo_object_service_and_data_client

object_service_client, data_client = create_evo_object_service_and_data_client(service_manager_widget=manager)

In [None]:
import glob
import pandas as pd
import random
from pprint import pp
from evo.data_converters.gef.converter import create_from_parsed_gef_cpts, parse_gef_files
from evo.data_converters.common.objects import DownholeCollectionToGeoscienceObject
from evo.data_converters.common.objects.downhole_collection import ColumnMapping

# Data files in data/input have been synthentically generated from a python script.
gef_files = glob.glob("data/input/*.gef")
parsed_cpt_files = parse_gef_files(gef_files)
# pprint(parsed_cpt_files)

downhole_collection = create_from_parsed_gef_cpts(parsed_cpt_files)

lithologies: list[str] = ["sandstone", "limestone", "shale", "granite", "basalt", "mudstone", "conglomerate"]

# Generate a somewhat complex interval table
interval_table_data = {
    # Hole index will come from LOCA_ID and mapped to the index of the hole_id in the collars df using 1-based arrays
    "hole_index": [],
    "GEOL_TOP": [],
    "GEOL_BASE": [],
    "GEOL_DENSITY": [],
    "GEOL_LITHOLOGY": [],
}

# each hole has intervals from 0 to 20m (based on sample data in ./data/input)
for hole_id in range(1, 5):
    cur_depth = 0

    while cur_depth < 20:
        interval_size = random.uniform(1.5, 4.0)

        # Make sure we don't exceed 20m
        if cur_depth + interval_size > 20:
            interval_size = 20 - cur_depth

        density = random.uniform(1.2, 2.6)

        interval_table_data["hole_index"].append(hole_id)
        interval_table_data["GEOL_TOP"].append(cur_depth)
        interval_table_data["GEOL_BASE"].append(cur_depth + interval_size)
        interval_table_data["GEOL_DENSITY"].append(density)
        interval_table_data["GEOL_LITHOLOGY"].append(random.choice(lithologies))

        cur_depth += interval_size

interval_table = pd.DataFrame(interval_table_data)

# create categorical attribute
interval_table["GEOL_LITHOLOGY"] = interval_table["GEOL_LITHOLOGY"].astype("category")

display(interval_table)

downhole_collection.add_measurement_table(
    input=interval_table, column_mapping=ColumnMapping(FROM_COLUMNS=["GEOL_TOP"], TO_COLUMNS=["GEOL_BASE"])
)

converter = DownholeCollectionToGeoscienceObject(dhc=downhole_collection, data_client=data_client)
geoscience_object = converter.convert()

pp(geoscience_object)

Optionally, you can publish the constructed downhole collection by running the following

In [None]:
from evo.data_converters.common import publish_geoscience_objects

result = publish_geoscience_objects(
    object_models=[geoscience_object],
    object_service_client=object_service_client,
    data_client=data_client,
    path_prefix="gef-notebook",
    overwrite_existing_objects=True,
)

pp(result)