# Test the initial installation of a Rhino client

#### Prerequisites
1. Have an active SSO login
2. Have an active Rhino Cloud user with the appropriate access
3. Have the path to the external data store (e.g. S3, GCS)
4. Copy the accompanying dataset.csv file over to the external data store

### 1. Initialization and Login

In [2]:
from getpass import getpass
import rhino_health as rh

### 2. Initialize the input values

In [62]:
USERNAME = "dan@rhinohealth.com"
# DATASTORE_PATH = "/rhino_data/external/s3/hc/encounters.csv"
DATASTORE_PATH = "/rhino_data/validation/"
CURRENT_WORKING_DIR = "/Users/danielrunt/PycharmProjects/user-resources/sandbox/site-testing"
DEV_URL = "https://dev.rhinohealth.com/api"
ECR_REPO_NAME = "rhino-gc-workgroup-rhino-health-dev"
ECR_ACCESs_KEY_ID = "<get from Rhino Cloud UI>"
ECR_ACCESS_SECRET_KEY = "<get from Rhino Cloud UI>"
DEV3_URL = ("https://dev3.rhinohealth.com/api"
            "")

### 3 Log in

In [27]:
print("Logging In")
session = rh.login(username=USERNAME, password=getpass(), rhino_api_url=DEV_URL)
#session = rh.login(authentication_details={"sso_access_token": TOKEN, "sso_provider": "google"}, rhino_api_url=DEV3_URL)
print("Logged In")

Logging In
Logged In


### 4 Create a project

In [12]:
from rhino_health.lib.endpoints.project.project_dataclass import ProjectCreateInput
user = session.current_user

project = session.project.get_project_by_name("Validation Project")
if project is None:
    new_project = ProjectCreateInput(
        name="Validation Project",
        description="Project for Site Installation Validation",
        type="Validation",
        primary_workgroup_uid=user.primary_workgroup_uid
    )
    project = session.project.add_project(new_project)

### 5 Create a dataset and schema using external datasource access

In [74]:
from rhino_health.lib.endpoints.dataset.dataset_dataclass import DatasetCreateInput
from rhino_health.lib.endpoints.data_schema.data_schema_dataclass import DataSchemaCreateInput

data_schema = session.data_schema.get_data_schema_by_name("Validation Dataset schema")
if data_schema is None:
    data_schema_input = DataSchemaCreateInput(
        name="Validation Dataset schema",
        description="",
        project_uid=project.uid,
        primary_workgroup=project.primary_workgroup_uid,
        file_path=f"{CURRENT_WORKING_DIR}/validation_schema.csv",
    )
    data_schema = session.data_schema.create_data_schema(data_schema_input)

dataset = session.dataset.get_dataset_by_name("Validation Dataset")
if dataset is None:
    dataset_input = DatasetCreateInput(
        name="Validation Dataset",
        description="",
        project_uid=project.uid,
        workgroup_uid=project.primary_workgroup_uid,
        data_schema_uid=data_schema.uid,
        csv_filesystem_location=DATASTORE_PATH+"dataset.csv",
        file_base_path=DATASTORE_PATH+"file_data/",
        method="filesystem",
        is_data_deidentified=True,
    )
    dataset = session.dataset.add_dataset(dataset_input)

### 6 Run Generalized Compute Code

In [75]:
bmi_calculation = "df['BMI'] = df['Weight'] / df['Height']**2"
output_schema_uid = session.data_schema.get_data_schema_by_name("Validation Dataset schema").uid

output_dataset, run_results = dataset.run_code(bmi_calculation, output_data_schema_uid = output_schema_uid, output_dataset_names_suffix = " with BMI")
run_results.dict()
print(f"Finished Creating Code Object")

Waiting for code run to complete (0 hours 0 minutes and a second)
Done.
Finished Creating Code Object


### 7 Use NVFlare to Infer a Model With a Code Object

In [80]:
from rhino_health.lib.endpoints.code_object.code_object_dataclass import (
    CodeObject,
    CodeObjectCreateInput,
    CodeObjectRunInput,
    ModelTrainInput,
    CodeTypes,
    CodeLocation,
    CodeExecutionMode,
    RequirementMode,
)

data_schema = session.data_schema.get_data_schema_by_name("Validation Dataset schema")
code_object = session.code_object.get_code_object_by_name("NVFlare Validation Code")
if code_object is None:
    code_object_input = CodeObjectCreateInput(
        name="NVFlare Validation Code",
        description="",
        input_data_schema_uids=[data_schema.uid],
        output_data_schema_uids=[data_schema.uid],
        project_uid=project.uid,
        code_type=CodeTypes.NVIDIA_FLARE_V2_4,
        config={"container_image_uri": "testing-model-v2.4.2"}
    )
    code_object = session.code_object.create_code_object(
        code_object_input, return_existing=False, add_version_if_exists=True
    )
    code_object.wait_for_build(1200)
    print(f"Finished Creating Code Object")

input_dataset_uids = [dataset.uid]
# code_run_input = CodeObjectRunInput(
#     code_object_uid = code_object.uid,
#     input_dataset_uids = [input_dataset_uids],
#     output_dataset_naming_templates = ["_infer"],
#     timeout_seconds = 1200
# )
run_params = ModelTrainInput(
    code_object_uid = code_object.uid,
    input_dataset_uids = input_dataset_uids,
    one_fl_client_per_dataset = False,
    validation_dataset_uids = [],
    validation_datasets_inference_suffix = "_validation",
    timeout_seconds = 600,
    config_fed_client = "",
    config_fed_server = "",
    secrets_fed_client = "",
    secrets_fed_server = "",
    sync = False
)
print(f"Starting to run federated training of {code_object.name}")
model_train = session.code_object.train_model(run_params)
train_result = model_train.wait_for_completion()
print(f"Finished training of {code_object.name}")

Finished Creating Code Object
Starting to run federated training of NVFlare Validation Code
Waiting for code run to complete (0 hours 0 minutes and a second)
Waiting for code run to complete (0 hours 0 minutes and 11 seconds)
Waiting for code run to complete (0 hours 0 minutes and 22 seconds)
Done.
Finished training of NVFlare Validation Code
