# Train Test Split with FCP
Demonstrate running multi-dataset Generalized Compute with the Rhino Health Python SDK

#### Prerequisites 
1. Have an input dataset in FCP
2. Have a container image pushed to your ECR repo with the train test split code (the code is available in the examples under generalized-compute/train-test-split)

### Initialization and Login

In [1]:
from getpass import getpass
import rhino_health as rh
from rhino_health.lib.endpoints.code_object.code_object_dataclass import (
    CodeObjectCreateInput,
    CodeTypes,
    CodeObjectRunInput,
)

In [5]:
print("Logging In")
my_username = "daniel.david@rhinohealth.com" # Replace this with the email you use to log into Rhino Health
rhino_url = 'https://staging.rhinohealth.com/api/'
my_workgroup_ecr_repo = "rhino-gc-workgroup-rhino-health-staging"  # Replace this with your workgroup's ECR repo
my_image_name = "train-test-split-dd-v3"  # The name (tag) of the container image you pushed to your ECR repo
ecr_base_uri = rh.lib.constants.ECRService.PROD_URL
session = rh.login(username=my_username, password=getpass(), rhino_api_url=rhino_url)
print("Logged In")

# print("Logging In")
# my_username = "my_email@example.com" # Replace this with the email you use to log into Rhino Health
# my_workgroup_ecr_repo = "rhino-gc-workgroup-XXXXXXXXXXXXXX"  # Replace this with your workgroup's ECR repo
# my_image_name = "train-test-split"  # The name (tag) of the container image you pushed to your ECR repo
# ecr_base_uri = rh.lib.constants.ECRService.PROD_URL
# session = rh.login(username=my_username, password=getpass.getpass())
# print("Logged In")

Logging In
Logged In


### Create the Code object

In [6]:
project_uid = "a1e2d73b-6858-4853-8a8c-30b126fb49f0"  # Replace with project UID
data_schema_uid = "c185a489-1a3f-4184-b195-ee25bfacb2fb"  # Replace with data schema UID

creation_params = CodeObjectCreateInput(
    name="Train Test Split",
    description="Train Test Split",
    code_type=CodeTypes.GENERALIZED_COMPUTE,
    config={"container_image_uri": f"{ecr_base_uri}/{my_workgroup_ecr_repo}:{my_image_name}"},
    project_uid=project_uid,
    input_data_schema_uids=[data_schema_uid],
    output_data_schema_uids=[data_schema_uid, data_schema_uid],
)
code = session.code_object.create_code_object(creation_params)
code

# project_uid = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"  # Replace with project UID
# data_schema_uid = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"  # Replace with data schema UID

# creation_params = CodeObjectCreateInput(
#     name="Train Test Split",
#     description="Train Test Split",
#     code_type=CodeTypes.GENERALIZED_COMPUTE,
#     config={"container_image_uri": f"{ecr_base_uri}/{my_workgroup_ecr_repo}:{my_image_name}"},
#     project_uid=project_uid,
#     input_data_schema_uids=[data_schema_uid],
#     output_data_schema_uids=[data_schema_uid, data_schema_uid],
# )
# code = session.code_object.create_code_object(creation_params)
# code

CodeObject(session=<rhino_health.lib.rhino_session.RhinoSession object at 0x1074ba9d0>, creator_uid='3c465cd8-a25d-43b2-949c-4db81cf6dcd8', created_at='2025-04-18T16:12:54.893034Z', name='Train Test Split', description='Train Test Split', input_data_schema_uids=<property object at 0x108ea98a0>, output_data_schema_uids=<property object at 0x108ea97b0>, inputs_config=[CodeObjectInputConfig(data_schema_uid='c185a489-1a3f-4184-b195-ee25bfacb2fb', is_optional=False, is_multiple=False)], outputs_config=[CodeObjectOutputConfig(data_schema_uid='c185a489-1a3f-4184-b195-ee25bfacb2fb', is_optional=False, is_multiple=False), CodeObjectOutputConfig(data_schema_uid='c185a489-1a3f-4184-b195-ee25bfacb2fb', is_optional=False, is_multiple=False)], project_uid='a1e2d73b-6858-4853-8a8c-30b126fb49f0', code_type='Generalized Compute', base_version_uid='f5fa388f-18eb-4898-a6a0-3504c2d90b60', config={'code_location': 'single_non_binary_file', 'code_execution_mode': 'snippet', 'container_base_type': 'python_cu

In [10]:
dir(run_result)


['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__class_vars__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_pydantic_core_schema__',
 '__get_pydantic_json_schema__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__hidden__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__pretty__',
 '__private_attributes__',
 '__pydantic_complete__',
 '__pydantic_core_schema__',
 '__pydantic_custom_init__',
 '__pydantic_decorators__',
 '__pydantic_extra__',
 '__pydantic_fields_set__',
 '__pydantic_generic_metadata__',
 '__pydantic_init_subclass__',
 '__pydantic_parent_namespace__',
 '__pydantic_post_init__',
 '__pydantic_private__',
 '__pydantic_root_model__',
 '__pydantic_serializer__',
 '__pydantic_validator__',
 '__reduce__',
 '__reduce_ex__',
 '__repr

### Run the code on the input dataset and print the results

In [13]:
input_dataset_uid = "f67b05af-5ccd-45b8-993b-d6dc38030940"  # Replace with dataset UID
run_params = CodeObjectRunInput(
    code_object_uid= code.uid,
    input_dataset_uids=[[input_dataset_uid]],
    output_dataset_naming_templates=['{{ input_dataset_names.0 }} - Train', '{{ input_dataset_names.0 }} - Test'],
    timeout_seconds=300,
)
print("Starting to run train_test_split")
code_run = session.code_object.run_code_object(run_params)
run_result = code_run.wait_for_completion()
print("Finished running train_test_split")
print(f"Result status is '{run_result.status.value}'")

# input_dataset_uid = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"  # Replace with dataset UID
# run_params = CodeObjectRunInput(
#     code_object_uid= code.uid,
#     input_dataset_uids=[[input_dataset_uid]],
#     output_dataset_naming_templates=['{{ input_dataset_names.0 }} - Train', '{{ input_dataset_names.0 }} - Test'],
#     timeout_seconds=300,
# )
# print("Starting to run train_test_split")
# code_run = session.code_object.run_code_object(run_params)
# run_result = code_run.wait_for_completion()
# print("Finished running train_test_split")
# print(f"Result status is '{run_result.status.value}', errors={run_result.results_info.get('errors') if run_result.results_info else None}")


Starting to run train_test_split
Waiting for code run to complete (0 hours 0 minutes and 0 seconds)
Done.
Finished running train_test_split
Result status is 'Failed'
