## Publish a Drilling Campaign object

This example shows how to convert drilling campaign data in CSV format into an Evo geoscience object using the Evo Python SDK.

### Requirements

You must have a Seequent account with the Evo entitlement to use this notebook.

The following parameters must be provided:

- The client ID of your Evo application.
- The callback/redirect URL of your Evo application.

To obtain these app credentials, refer to the [Apps and tokens guide](https://developer.seequent.com/docs/guides/getting-started/apps-and-tokens) in the Seequent Developer Portal.

In [None]:
import uuid

import pandas as pd
from evo_schemas.components import (
    BoundingBox_V1_0_1,
    CategoryAttribute_V1_1_0,
    CategoryData_V1_0_1,
    Crs_V1_0_1_EpsgCode,
    HoleChunks_V1_0_0,
    HoleCollars_V1_0_0,
    NanCategorical_V1_0_1,
    StringAttribute_V1_1_0,
)
from evo_schemas.elements import (
    FloatArray3_V1_0_1,
    FloatArray6_V1_0_1,
    IntegerArray1_V1_0_1,
    LookupTable_V1_0_1,
    StringArray_V1_0_1,
)
from evo_schemas.objects import (
    DrillingCampaign_V1_0_0,
    DrillingCampaign_V1_0_0_Planned,
    DrillingCampaign_V1_0_0_Planned_Path_NaturalDeviation,
)

from evo.notebooks import FeedbackWidget, ServiceManagerWidget
from evo.objects import ObjectAPIClient

cache_location = "data"
input_path = f"{cache_location}/input"

# Evo app credentials
client_id = "<your-client-id>"
redirect_url = "<your-redirect-url>"

manager = await ServiceManagerWidget.with_auth_code(
    discovery_url="https://discover.api.seequent.com",
    redirect_url=redirect_url,
    client_id=client_id,
    cache_location=cache_location,
).login()

### Use the Evo Python SDK to create an object client and a data client

In [None]:
# The object client will manage your auth token and Geoscience Object API requests.
object_client = ObjectAPIClient(manager.get_environment(), manager.get_connector())

# The data client will manage saving your data as Parquet and publishing your data to Evo storage.
data_client = object_client.get_data_client(manager.cache)

### Define helper functions

These functions assist with assembling the elements and components of geoscience objects and for viewing the new object in the Evo portal.

In [None]:
import numpy as np


def create_hole_id_mapping(hole_id_table, value_list):
    """
    Create a hole ID mapping table based on the hole ID table and the value list.

    Args:
        hole_id_table (pd.DataFrame): The hole ID lookup table.
        value_list (pd.DataFrame): The value list to create the mapping from.

    Returns:
        mapping_df (pd.DataFrame): The hole ID mapping table.
    """

    num_keys = len(hole_id_table.index)

    mapping_df = pd.DataFrame(list())
    mapping_df["hole_index"] = hole_id_table["key"] - 1
    mapping_df["offset"] = [0] * num_keys
    mapping_df["count"] = [0] * num_keys

    print(mapping_df.head())

    mapping_df["hole_index"] = mapping_df["hole_index"].astype("int32")
    mapping_df["offset"] = mapping_df["offset"].astype("uint64")
    mapping_df["count"] = mapping_df["count"].astype("uint64")

    print(mapping_df.head())

    prev_value = ""
    key = ""
    count = 0
    offset = 0

    for index, row in value_list.iterrows():
        new_value = row["data"]

        if new_value != prev_value:
            if prev_value != "":
                mapping_df.loc[mapping_df["hole_index"] == key, "count"] = count
                mapping_df.loc[mapping_df["hole_index"] == key, "offset"] = offset
                offset += count

            mask = hole_id_table["value"] == new_value
            masked_df = hole_id_table[mask]
            try:
                key_row = masked_df.iloc[[0]]
            except IndexError:
                print("Ignoring this hole ID")
                continue

            key = key_row["key"].iloc[0] - 1
            count = 1
            prev_value = new_value
        else:
            count += 1

    mapping_df.loc[mapping_df["hole_index"] == key, "count"] = count
    mapping_df.loc[mapping_df["hole_index"] == key, "offset"] = offset

    return mapping_df


def create_category_lookup_and_values(attribute):
    """
    Create a category lookup table and the associated column of mapped key values.

    Args:
        attribute (pd.DataFrame): An attribute of a geoscience object.

    Returns:
        table_df (pd.DataFrame): The category lookup table.
        values_df (pd.DataFrame): The associated column with mapped key values.
    """

    # Replace NaN with empty string
    attribute.replace(np.nan, "", regex=True, inplace=True)
    set_obj = set(attribute["data"])
    list_obj = list(set_obj)
    list_obj.sort()
    num_unique_elements = len(list_obj)

    # Create lookup table
    table_df = pd.DataFrame([])
    table_df["key"] = list(range(1, num_unique_elements + 1))
    table_df["value"] = list_obj

    # Create data column
    values_df = pd.DataFrame([])
    values_df["data"] = attribute["data"].map(table_df.set_index("value")["key"])
    return table_df, values_df

### Define object metadata

Geoscience object data must conform to a specific object schema. The `evo-schemas` package provides Pydantic models that make it easy to work with the equivalent JSON schemas. 
For this example we'll use v1.0.0 of the drilling-campaign schema, via the relevant Pydantic model.

Enter values for these parameters that are required by the pointset schema.
- `object_hole_id`: The column name that represents your hole ID. This value should be the same across all input files.
- `object_name`: The name of the object.
- `object_path`: The file path where the object will be found.
- `object_epsg_code`: (Optional) The EPSG region code that matches the location of your data. Leave as `None` if not required.
- `object_tags`: (Optional) A dictionary of additional tags to be assigned to the object. Leave as `None` is not required.

In [None]:
# Set the name of the hole ID parameter in the data.
object_hole_id = "hole_id"

# Set other object properties.
object_name = "Drilling_campaign_SDK_demo"
object_path = "Jupyter_Example"
object_epsg_code = 32650
object_tags = {"Source": "Jupyter Notebook"}

# Define a coordinate reference system (CRS) for the object.
coordinate_reference_system = Crs_V1_0_1_EpsgCode(epsg_code=object_epsg_code)

# Create an empty list to store geoscience object collections.
collections = []

# Define the object path.
full_obj_path = f"{object_path}/{object_name}.json"

### Define object attributes and keys

In [None]:
# List all of the attributes to be included in the object. Every attribute must have a unique key associated with it.
# Keys must be unique across the entire object, and we recommend saving a reference to the keys for later use.
collar_attributes = [
    {
        "name": "Comments",
        "key": str(uuid.uuid4()),
        "type": "string",
    }
]

## Collar data

In [None]:
# Define input and output file paths.
input_file_path = f"{input_path}/collar.csv"

# Load the collar file, count the number of hole IDs and sort the data based on the hole ID.
input_df = pd.read_csv(input_file_path)
print(input_df.head())
num_hole_ids = len(input_df.index)
sorted_collar_df = input_df.sort_values([object_hole_id]).reset_index(drop=True)

### Hole ID table

Components in the drilling campaign object will reference the hole IDs defined in the collar table.

This means that we must create a 2-column dataframe that maps a unique `key` to a `hole ID`.

The `planned` and `interim` sections of the drilling campaign object makes use of this mapping, so we provide a 1-column dataframe that lists the keys in the order they are displayed in the input file.

In [None]:
# Create a dataframe for the hole IDs.
hole_id_table_df = pd.DataFrame()
hole_id_table_df["key"] = [i for i in range(1, num_hole_ids + 1)]
hole_id_table_df["value"] = sorted_collar_df[object_hole_id]
hole_id_table_component = LookupTable_V1_0_1.from_dict(data_client.save_dataframe(hole_id_table_df))
print(hole_id_table_df.head())

### Hole ID values

In [None]:
# Create a dataframe and generate a list from 1 to `n`, where `n` is the number of hole IDs.
# This table represents the list of hole IDs created in the previous step.
hole_id_values_df = pd.DataFrame()
hole_id_values_df["data"] = [i for i in range(1, num_hole_ids + 1)]
hole_id_values_component = IntegerArray1_V1_0_1.from_dict(data_client.save_dataframe(hole_id_values_df))

hole_id_component = CategoryData_V1_0_1(
    table=hole_id_table_component,
    values=hole_id_values_component,
)

print(hole_id_component)

### Coordinates

In [None]:
# Create a dataframe and copy the required columns.
coordinates_df = input_df[["x", "y", "z"]]

# Create a `BoundingBox_V1_0_1` component for the bounding box.
bounding_box = BoundingBox_V1_0_1(
    min_x=coordinates_df["x"].min(),
    max_x=coordinates_df["x"].max(),
    min_y=coordinates_df["y"].min(),
    max_y=coordinates_df["y"].max(),
    min_z=coordinates_df["z"].min(),
    max_z=coordinates_df["z"].max(),
)

# Create a `FloatArray3_V1_0_1` element for the coordinates.
coordinates_component = FloatArray3_V1_0_1.from_dict(data_client.save_dataframe(coordinates_df))

### Distances

In [None]:
# Create a distances dataframe and copy the required columns.
# NOTE: The drilling campaign object requires 3 columns: `final`, `target` and `current`,
distances_df = input_df[["final", "target", "current"]]

# Create a `FloatArray3_V1_0_1` element for the distances.
distances_component = FloatArray3_V1_0_1.from_dict(data_client.save_dataframe(distances_df))

### Attributes

In [None]:
attributes = []

for collar_attribute in collar_attributes:
    name = collar_attribute["name"]
    key = collar_attribute["key"]
    type = collar_attribute["type"]

    attribute_df = pd.DataFrame()
    attribute_df["data"] = input_df[name]

    if type == "string":
        attribute_df = attribute_df.astype(str).reset_index(drop=True)
        values = StringArray_V1_0_1.from_dict(data_client.save_dataframe(attribute_df))
        attribute = StringAttribute_V1_1_0(name=name, key=key, values=values)
    elif type == "category":
        attribute_df = attribute_df.astype(str).reset_index(drop=True)
        table_df, values_df = create_category_lookup_and_values(attribute_df)

        table = LookupTable_V1_0_1.from_dict(data_client.save_dataframe(table_df))
        values = IntegerArray1_V1_0_1.from_dict(data_client.save_dataframe(values_df))

        attribute = CategoryAttribute_V1_1_0(
            name=name, key=key, nan_description=NanCategorical_V1_0_1(values=[]), table=table, values=values
        )
    else:
        print(f"Unsupported attribute type: {type}")
        continue

    attributes.append(attribute)

print(attributes)

## Planned data

In [None]:
# Define input and output file paths.
input_file_path = f"{input_path}/planned.csv"

# Load the file.
df = pd.read_csv(input_file_path)
df = df.sort_values([object_hole_id]).reset_index(drop=True)
df.head()

### Path

Natural deviation

In [None]:
planned_hole_values_df = pd.DataFrame()
planned_hole_values_df["data"] = df[object_hole_id]

print(planned_hole_values_df.head())

In [None]:
holes_df = create_hole_id_mapping(hole_id_table=hole_id_table_df, value_list=planned_hole_values_df)
print(holes_df.head())

hole_chunks_component = HoleChunks_V1_0_0.from_dict(data_client.save_dataframe(holes_df))

# print(hole_chunks_component)

In [None]:
hole_collars_component = HoleCollars_V1_0_0(
    coordinates=coordinates_component, distances=distances_component, holes=hole_chunks_component, attributes=attributes
)
print(hole_collars_component)

In [None]:
natural_dev_df = df[["distance", "azimuth", "dip", "lift rate", "drift rate", "deviation rate distance"]]
print(natural_dev_df.head())

natural_dev_component = FloatArray6_V1_0_1.from_dict(data_client.save_dataframe(natural_dev_df))

print(natural_dev_component)

In [None]:
path_component = DrillingCampaign_V1_0_0_Planned_Path_NaturalDeviation.from_dict(
    data_client.save_dataframe(natural_dev_df)
)

print(path_component)

### Create the planned component

In [None]:
planned_component = DrillingCampaign_V1_0_0_Planned(
    collar=hole_collars_component,
    path=path_component,
)

print(planned_component)

### Create the drilling campaign object

In [None]:
dc = DrillingCampaign_V1_0_0(
    name=object_name,
    bounding_box=bounding_box,
    coordinate_reference_system=coordinate_reference_system,
    uuid=None,
    hole_id=hole_id_component,
    planned=planned_component,
    tags=object_tags,
)

print(dc)

In [None]:
await data_client.upload_referenced_data(dc.as_dict(), FeedbackWidget("Uploading data"))
new_drilling_campaign_metadata = await object_client.create_geoscience_object(full_obj_path, dc.as_dict())