# Test the initial installation of a Rhino client

#### Prerequisites
1. Have an active Rhino Cloud user with the appropriate access
2. Have the path to the Client-mounted storage (e.g. S3, GCS)
3. Copy the accompanying data directory file over to the external data store

### 1. Initialization and Login

In [None]:
pip install --upgrade rhino_health

In [None]:
from getpass import getpass
import rhino_health as rh


### 2. Initialize the input values

In [None]:
USERNAME = "<username>"
CLIENT_DATA_PATH = "/rhino_data/external/<path to your data>"

### 3. Log in

In [None]:
from rhino_health import ApiEnvironment
print("Logging In")
session = rh.login(username=USERNAME, password=getpass())
print("Logged In")

### 4. Create a project

In [None]:
from rhino_health.lib.endpoints.project.project_dataclass import ProjectCreateInput
user = session.current_user

project = session.project.get_project_by_name("Validation Project")
if project is None:
    new_project = ProjectCreateInput(
        name="Validation Project",
        description="Project for Site Installation Validation",
        type="Validation",
        primary_workgroup_uid=user.primary_workgroup_uid
    )
    project = session.project.add_project(new_project)
    print(f"Finished Creating Project")

### 5. Create a dataset and schema using client-mounted data access

In [None]:
from rhino_health.lib.endpoints.dataset.dataset_dataclass import DatasetCreateInput
import os

dataset = session.dataset.get_dataset_by_name("Validation Dataset")
if dataset is None:
    dataset_input = DatasetCreateInput(
        name="Validation Dataset",
        description="",
        project_uid=project.uid,
        workgroup_uid=project.primary_workgroup_uid,
        csv_filesystem_location=os.path.join(CLIENT_DATA_PATH, "credit_risk_dataset.csv"),
        method="filesystem",
        data_schema=None,
        is_data_deidentified=True
    )
    dataset = session.dataset.add_dataset(dataset_input)
    print(f"Finished Creating Dataset")

### 6. Run Generalized Compute Code

In [None]:
debt_ratio_calculation = "df['debt_ratio'] = df['loan_amnt'] / df['person_income']"
output_schema_uid = dataset.data_schema_uid

output_dataset, run_results = dataset.run_code(debt_ratio_calculation, output_data_schema_uid = output_schema_uid, output_dataset_names_suffix = " With Debt Ratio")
results = run_results.dict()['status']
print(f"Finished Creating and Running Code object with result: {results}")

### 7. Build NVFlare container

In [None]:
from rhino_health.lib.endpoints.code_object.code_object_dataclass import (
    CodeObjectCreateInput,
    CodeTypes,
    CodeExecutionMode,
    RequirementMode,
    CodeLocation,
    ModelTrainInput
)

autocontainer = session.code_object.get_code_object_by_name("NVFlare Autocontainer")
if autocontainer is None:
    autocontainer_input = CodeObjectCreateInput(
        name="NVFlare Autocontainer",
        description="",
        input_data_schema_uids=[output_schema_uid],
        output_data_schema_uids=[output_schema_uid],
        project_uid=project.uid,
        code_type=CodeTypes.NVIDIA_FLARE_V2_5,
        config={
            "code_execution_mode": CodeExecutionMode.AUTO_CONTAINER_NVFLARE,
            "requirements": [
                "scikit-learn~=1.5.2",
                "pandas~=2.2.3",
                "xgboost~=2.1.2",
                "nvflare~=2.5.0",
            ],
            "python_version": "3.10",
            "requirements_mode": RequirementMode.PYTHON_PIP,
            "code_location": CodeLocation.S3_MULTIPART_ZIP,
            "folder_path": "./model/"
        }
    )
    autocontainer = session.code_object.create_code_object(autocontainer_input)
    autocontainer.wait_for_build(1200)
    print(f"Finished Creating Autocontainer")

input_dataset_uids = [dataset.uid]
run_params = ModelTrainInput(
    code_object_uid = autocontainer.uid,
    input_dataset_uids = input_dataset_uids,
    one_fl_client_per_dataset = False,
    validation_dataset_uids = [],
    validation_datasets_inference_suffix = "_validation",
    timeout_seconds = 600,
    config_fed_client = "",
    config_fed_server = "",
    secrets_fed_client = "",
    secrets_fed_server = "",
    sync = False
)
print(f"Starting to run federated training of {autocontainer.name}")
model_train = session.code_object.train_model(run_params)
train_result = model_train.wait_for_completion()
print(f"Finished training of {autocontainer.name} with result: {train_result}")