# Train Test Split with FCP
Demonstrate running multi-dataset Generalized Compute with the Rhino Health Python SDK

#### Prerequisites 
1. Have an input dataset in FCP
2. Have a container image pushed to your ECR repo with the train test split code (the code is available in the examples under generalized-compute/train-test-split)

### Initialization and Login

In [None]:
from getpass import getpass
import rhino_health as rh
from rhino_health.lib.endpoints.code_object.code_object_dataclass import (
    CodeObjectCreateInput,
    CodeTypes,
    CodeObjectRunInput,
)

In [None]:
print("Logging In")
my_username = "my_email@example.com" # Replace this with the email you use to log into Rhino Health
my_workgroup_ecr_repo = "rhino-gc-workgroup-XXXXXXXXXXXXXX"  # Replace this with your workgroup's ECR repo
my_image_name = "train-test-split"  # The name (tag) of the container image you pushed to your ECR repo
ecr_base_uri = rh.lib.constants.ECRService.PROD_URL
session = rh.login(username=my_username, password=getpass.getpass())
print("Logged In")

### Create the Code object

In [None]:
project_uid = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"  # Replace with project UID
data_schema_uid = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"  # Replace with data schema UID

creation_params = CodeObjectCreateInput(
    name="Train Test Split",
    description="Train Test Split",
    code_type=CodeTypes.GENERALIZED_COMPUTE,
    config={"container_image_uri": f"{ecr_base_uri}/{my_workgroup_ecr_repo}:{my_image_name}"},
    project_uid=project_uid,
    input_data_schema_uids=[data_schema_uid],
    output_data_schema_uids=[data_schema_uid, data_schema_uid],
)
code = session.code_object.create_code_object(creation_params)
code

### Run the code on the input dataset and print the results

In [None]:
input_dataset_uid = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"  # Replace with dataset UID
run_params = CodeObjectRunInput(
    code_object_uid= code.uid,
    input_dataset_uids=[[input_dataset_uid]],
    output_dataset_naming_templates=['{{ input_dataset_names.0 }} - Train', '{{ input_dataset_names.0 }} - Test'],
    timeout_seconds=300,
)
print("Starting to run train_test_split")
code_run = session.code_object.run_code_object(run_params)
run_result = code_run.wait_for_completion()
print("Finished running train_test_split")
print(f"Result status is '{run_result.status.value}', errors={run_result.results_info.get('errors') if run_result.results_info else None}")
