# Test the initial installation of a Rhino client

#### Prerequisites
1. Have an active SSO login
2. Have an active Rhino Cloud user with the appropriate access
3. Have the path to the external data store (e.g. S3, GCS)
4. Copy the accompanying data directory file over to the external data store

### 1. Initialization and Login

In [19]:
from getpass import getpass
import rhino_health as rh
from rhino_health.lib.endpoints.code_object import CodeExecutionMode

### 2. Initialize the input values

In [20]:
USERNAME = "dan@rhinohealth.com"
# DATASTORE_PATH = "/rhino_data/external/s3/hc/encounters.csv"
DATASTORE_PATH = "/rhino_data/validation/"
CURRENT_WORKING_DIR = "/Users/danielrunt/PycharmProjects/user-resources/sandbox/site-testing"
DEV_URL = "https://dev.rhinohealth.com/api"

### 3 Log in

In [21]:
print("Logging In")
session = rh.login(username=USERNAME, password=getpass(), rhino_api_url=DEV_URL)
#session = rh.login(authentication_details={"sso_access_token": TOKEN, "sso_provider": "google"}, rhino_api_url=DEV3_URL)
print("Logged In")

Logging In
Logged In


### 4 Create a project

In [30]:
from rhino_health.lib.endpoints.project.project_dataclass import ProjectCreateInput
user = session.current_user

project = session.project.get_project_by_name("Validation Project")
if project is None:
    new_project = ProjectCreateInput(
        name="Validation Project",
        description="Project for Site Installation Validation",
        type="Validation",
        primary_workgroup_uid=user.primary_workgroup_uid
    )
    project = session.project.add_project(new_project)

### 5 Create a dataset and schema using external datasource access

In [34]:
from rhino_health.lib.endpoints.dataset.dataset_dataclass import DatasetCreateInput
from rhino_health.lib.endpoints.data_schema.data_schema_dataclass import DataSchemaCreateInput

data_schema = session.data_schema.get_data_schema_by_name("Validation Dataset schema")
if data_schema is None:
    data_schema_input = DataSchemaCreateInput(
        name="Validation Dataset schema",
        description="",
        project_uid=project.uid,
        primary_workgroup=project.primary_workgroup_uid,
        file_path=f"{CURRENT_WORKING_DIR}/validation_schema.csv",
    )
    data_schema = session.data_schema.create_data_schema(data_schema_input)
    print(f"Finished Creating Data Schema")

dataset = session.dataset.get_dataset_by_name("Validation Dataset")
if dataset is None:
    dataset_input = DatasetCreateInput(
        name="Validation Dataset",
        description="",
        project_uid=project.uid,
        workgroup_uid=project.primary_workgroup_uid,
        data_schema_uid=data_schema.uid,
        csv_filesystem_location=DATASTORE_PATH+"dataset.csv",
        file_base_path=DATASTORE_PATH+"file_data/",
        method="filesystem",
        is_data_deidentified=True,
    )
    dataset = session.dataset.add_dataset(dataset_input)
    print(f"Finished Creating Dataset")

### 6 Run Generalized Compute Code

In [35]:
bmi_calculation = "df['BMI'] = df['Weight'] / df['Height']**2"
output_schema_uid = session.data_schema.get_data_schema_by_name("Validation Dataset schema").uid

output_dataset, run_results = dataset.run_code(bmi_calculation, output_data_schema_uid = output_schema_uid, output_dataset_names_suffix = " with BMI")
run_results.dict()
print(f"Finished Creating Code Object")

Waiting for code run to complete (0 hours 0 minutes and a second)
Done.
Finished Creating Code Object


### 7 Build nvflare container

In [36]:
from rhino_health.lib.endpoints.code_object.code_object_dataclass import (
    CodeObjectCreateInput,
    CodeTypes,
    CodeExecutionMode,
    RequirementMode,
    CodeLocation,
    ModelTrainInput
)

autocontainer = session.code_object.get_code_object_by_name("NVFlare Autocontainer")
if autocontainer is None:
    autocontainer_input = CodeObjectCreateInput(
        name="NVFlare Autocontainer",
        description="",
        input_data_schema_uids=[data_schema.uid],
        output_data_schema_uids=[data_schema.uid],
        project_uid=project.uid,
        code_type=CodeTypes.NVIDIA_FLARE_V2_4,
        config={
            "code_execution_mode": CodeExecutionMode.AUTO_CONTAINER_NVFLARE,
            "requirements": [
                "nvflare == 2.4.0",
                "pandas ~= 1.4.1",
                "tensorboardX ~= 2.6.2",
                "torch >= 1.11, < 1.12",
                "torchvision >= 0.12, < 0.13",
                "scikit-learn ~= 1.3.0"
            ],
            "python_version": "3.8",
            "requirements_mode": RequirementMode.PYTHON_PIP,
            "code_location": CodeLocation.S3_MULTIPART_ZIP,
            "folder_path": "./model/"
        }
    )
    autocontainer = session.code_object.create_code_object(autocontainer_input)
    autocontainer.wait_for_build(1200)
    print(f"Finished Creating Autocontainer")

input_dataset_uids = [dataset.uid]
run_params = ModelTrainInput(
    code_object_uid = autocontainer.uid,
    input_dataset_uids = input_dataset_uids,
    one_fl_client_per_dataset = False,
    validation_dataset_uids = [],
    validation_datasets_inference_suffix = "_validation",
    timeout_seconds = 600,
    config_fed_client = "",
    config_fed_server = "",
    secrets_fed_client = "",
    secrets_fed_server = "",
    sync = False
)
print(f"Starting to run federated training of {autocontainer.name}")
model_train = session.code_object.train_model(run_params)
train_result = model_train.wait_for_completion()
print(f"Finished training of {autocontainer.name}")

Waiting for code object build to complete (0 hours 0 minutes and a second)
Waiting for code object build to complete (0 hours 0 minutes and 31 seconds)
Waiting for code object build to complete (0 hours a minute and 2 seconds)
Waiting for code object build to complete (0 hours a minute and 33 seconds)
Waiting for code object build to complete (0 hours 2 minutes and 4 seconds)
Waiting for code object build to complete (0 hours 2 minutes and 34 seconds)
Done.
Finished Creating Autocontainer
Starting to run federated training of NVFlare Autocontainer
Waiting for code run to complete (0 hours 0 minutes and a second)
Waiting for code run to complete (0 hours 0 minutes and 11 seconds)
Waiting for code run to complete (0 hours 0 minutes and 22 seconds)
Waiting for code run to complete (0 hours 0 minutes and 33 seconds)
Waiting for code run to complete (0 hours 0 minutes and 43 seconds)
Waiting for code run to complete (0 hours 0 minutes and 54 seconds)
Waiting for code run to complete (0 hour