# Test the initial installation of a Rhino client

#### Prerequisites
1. Have an active Rhino FCP user with the appropriate access
2. Have the path to the Client-mounted storage (e.g. S3, GCS)
3. Copy the accompanying data directory file over to the external data store

### 1. Initialization and Login

In [None]:
pip install --upgrade rhino_health

In [1]:
from getpass import getpass
import rhino_health as rh


### 2. Initialize the input values

In [2]:
USERNAME = "tony@rhinohealth.com"
CLIENT_DATA_PATH = "/rhino_data/external/s3"

### 3. Log in

In [4]:
from rhino_health import ApiEnvironment
print("Logging In")
session = rh.login(username=USERNAME, password=getpass(), rhino_api_url=rh.ApiEnvironment.STAGING_AWS_URL)
print("Logged In")

Logging In
Logged In


### 4. Create a project

In [13]:
from rhino_health.lib.endpoints.project.project_dataclass import ProjectCreateInput
user = session.current_user

project = session.project.get_project_by_name("Designing Chemprop Demo")
if project is None:
    new_project = ProjectCreateInput(
        name="Validation Project",
        description="Project for Site Installation Validation",
        type="Validation",
        primary_workgroup_uid=user.primary_workgroup_uid
    )
    project = session.project.add_project(new_project)
    print(f"Finished Creating Project")

### 5. Create a dataset and schema using client-mounted data access

In [14]:
from rhino_health.lib.endpoints.dataset.dataset_dataclass import DatasetCreateInput
import os

dataset = project.get_dataset_by_name("Validation Dataset")
if dataset is None:
    dataset_input = DatasetCreateInput(
        name="Validation Dataset",
        description="",
        project_uid=project.uid,
        workgroup_uid=project.primary_workgroup_uid,
        csv_filesystem_location=os.path.join(CLIENT_DATA_PATH, "cyp3a4_all_no_test.csv"),
        method="filesystem",
        data_schema=None,
        is_data_deidentified=True
    )
    dataset = session.dataset.add_dataset(dataset_input)
    print(f"Finished Creating Dataset")


Finished Creating Dataset


### 6. Run Generalized Compute Code

In [None]:
from pathlib import Path
from textwrap import dedent
from rhino_health.lib.endpoints.code_object.code_object_dataclass import (
    CodeObjectCreateInput, 
    CodeObjectRunInput,
    CodeTypes, 
    CodeExecutionMode, 
    RequirementMode, 
    CodeLocation
)

gen_comp_code = project.get_code_object_by_name("Molecule Weight Calculation")
if gen_comp_code is None:
    requirements = [
        "pandas==2.2.3",
        "rdkit==2024.9.4"
    ]
    gen_comp_code_input = CodeObjectCreateInput(
        name="Molecule Weight Calculation",
        description="",
        input_data_schema_uids=[
            dataset.data_schema_uid
        ],
        output_data_schema_uids=[None],
        project_uid=project.uid,
        code_type=CodeTypes.PYTHON_CODE,
        config={
            "code_execution_mode": CodeExecutionMode.AUTO_CONTAINER_FILE,
            "python_version": "3.11",
            "requirements_mode": RequirementMode.PYTHON_PIP,
            "requirements": requirements,
            "code_location": CodeLocation.DEFAULT,
            "code": dedent(
                """
                import pandas as pd
                from rdkit import Chem
                from rdkit.Chem import Descriptors
                
                def calculate_molecule_weight(smiles):
                    mol = Chem.MolFromSmiles(smiles)
                    return Descriptors.MolWt(mol)
                
                df = pd.read_csv('/input/dataset.csv')
                df["molecule_weight"] = df["smiles"].apply(calculate_molecule_weight)
                df.to_csv('/output/dataset.csv', index=False)

                """
            )
        },
    )
    gen_comp_code = session.code_object.create_code_object(
        gen_comp_code_input, return_existing=False, add_version_if_exists=True)
    gen_comp_code = gen_comp_code.wait_for_build(1200)
    print(f"Finished Creating Code Object")

gen_comp_code_run_input = CodeObjectRunInput(
    code_object_uid=gen_comp_code.uid,
    input_dataset_uids=[[dataset.uid]],
    output_dataset_naming_templates=["{{input_dataset_names.0}} - Molecule Weight"],
    timeout_seconds=1200
)

gen_comp_code_run = session.code_object.run_code_object(gen_comp_code_run_input)
gen_comp_code_run = gen_comp_code_run.wait_for_completion(1200)

### 7. Build NVFlare container

In [None]:
from rhino_health.lib.endpoints.code_object.code_object_dataclass import (
    CodeObjectCreateInput,
    CodeTypes,
    CodeExecutionMode,
    RequirementMode,
    CodeLocation,
    ModelTrainInput
)

# Get the output dataset correctly
output_dataset = gen_comp_code_run.output_datasets[0]

input_schema = output_dataset.data_schema_uid

nvflare_autocontainer = project.get_code_object_by_name("NVFlare Chemprop Autocontainer")

if nvflare_autocontainer is None:
    nvflare_autocontainer_input = CodeObjectCreateInput(
        name="NVFlare Chemprop Autocontainer",
        description="",
        input_data_schema_uids=[input_schema],
        project_uid=project.uid,
        code_type=CodeTypes.NVIDIA_FLARE_V2_6,
        config={
            "code_execution_mode": CodeExecutionMode.AUTO_CONTAINER_NVFLARE,
            "requirements": [
                "nvflare>=2.5.0",
                "chemprop==2.0.5",
                "tensorboard"
            ],
            "python_version": "3.11",
            "requirements_mode": RequirementMode.PYTHON_PIP,
            "code_location": CodeLocation.S3_MULTIPART_ZIP,
            "folder_path": "./model/"
        }
    )
    nvflare_autocontainer = session.code_object.create_code_object(nvflare_autocontainer_input)
    nvflare_autocontainer.wait_for_build(1200)
    print(f"Finished Creating Autocontainer")

run_params = ModelTrainInput(
    code_object_uid = nvflare_autocontainer.uid,
    input_dataset_uids = [output_dataset.uid],
    one_fl_client_per_dataset = False,
    validation_dataset_uids = [output_dataset.uid],
    validation_datasets_inference_suffix = " - Results",
    timeout_seconds = 1200,
    config_fed_client = "",
    config_fed_server = "",
    secrets_fed_client = "",
    secrets_fed_server = "",
    sync = False
)
print(f"Starting to run federated training of {nvflare_autocontainer.name}")
model_train = session.code_object.train_model(run_params)
train_result = model_train.wait_for_completion(1200)
print(f"Finished training of {nvflare_autocontainer.name} with result: {train_result}")

82aefce1-eee9-4064-883b-bf698b0b01c0
