# Example: Metrics calculations using the rhino_health SDK

### Load all necessary libraries, including 'rhino_health'

In [21]:
from getpass import getpass
import rhino_health as rh
import pandas as pd
from rhino_health.lib.metrics import *
from rhino_health.lib.metrics.epidemiology.two_by_two_table_based_metrics import *
from rhino_health.lib.metrics.statistics_tests import ChiSquare, TTest, OneWayANOVA

In [22]:
! pip install --upgrade rhino_health



### Log in to the Rhino Health Platform

**Note: Replace "USERNAME" with your Rhino Health username.**


In [23]:
my_username = "daniel.david@rhinohealth.com" # Replace this with the email you use to log into Rhino Health
staging_url='https://staging.rhinofcp.com/api/'

print("Logging In")
session = rh.login(username=my_username, password=getpass(), rhino_api_url= staging_url)
print("Logged In")

# my_username = "my_email@example.com" # Replace this with the email you use to log into Rhino Health

# print("Logging In")
# session = rh.login(username=my_username, password=getpass())
# print("Logged In")

Logging In
Logged In


### Run this cell to list all of the projects in your environment:

In [24]:
projects = session.project.search_for_projects_by_name(name="")
for p in projects:
    print(p.name)

metrics_examples
cox_test
Pre-approved Code Project
test_dcm2png
Incremental
Tutorial 2 GCP
Tutorial 1 GCP
Test Noy
Validation Project
Tal Test
Yuval Test HC
Yuval Test
Test IC


### Load the Project you would like to calculate metrics for by placing the Project's name below
Replace `PROJECT_NAME` with the name of your project

In [25]:
project = session.project.get_project_by_name("metrics_examples")
if not project:
        raise ValueError("Project not found.")

print("Selected project name:", project.name)

# project = session.project.get_project_by_name("PROJECT_NAME")
# if not project:
#         raise ValueError("Project not found.")

# print("Selected project name:", project.name)

Selected project name: metrics_examples


### List your available datasets in your project and then load those you would like to calculate federated percentiles

Run this cell to list all of your available datasets in your project

In [26]:
all_datasets = session.dataset.search_for_datasets_by_name(name="")
project_datasets = [d for d in all_datasets if d.project and d.project.uid == project.uid]

print(f"\nDatasets in project '{project.name}':")
for d in project_datasets:
    print(f"- {d.name}")


Datasets in project 'metrics_examples':
- input_1
- input_2


Load the datasets you would like to calculate metrics for by placing the datasets names below,
Replace `DATASET_1` & `DATASET_2` with the name of your datasets

In [34]:
# dataset_uids = [
#     project.get_dataset_by_name("input_1"),
#     project.get_dataset_by_name("input_2"),
# ]

dataset_uids = [
    str(project.get_dataset_by_name("input_1").uid),
    str(project.get_dataset_by_name("input_2").uid),
]

The datasets should be in the following format:

In [27]:
pd.DataFrame({
    'Weight': [84.0, 97.0, 91.0, 90.0, 124.0, 97.0],
    'Pneumonia': [True, True, True, True, False, True],
    'Smoking': [False, True, False, False, True, True],
    'Inflammation Level': ['Low', 'Medium', 'Medium', 'High', 'High', 'High'],
    'Spo2 Level': [0.3, 0.51, 0.12, 0.03, 0.413, 0.3]
})

Unnamed: 0,Weight,Pneumonia,Smoking,Inflammation Level,Spo2 Level
0,84.0,True,False,Low,0.3
1,97.0,True,True,Medium,0.51
2,91.0,True,False,Medium,0.12
3,90.0,True,False,High,0.03
4,124.0,False,True,High,0.413
5,97.0,True,True,High,0.3


In [None]:
# for uid in dataset_uids:
#     ds = session.dataset.get_dataset(uid)
#     print(f"\nSchema for dataset '{ds.name}':")
#     print(ds.schema)


Exception: Failed to make request
Status is 500, Trace Id: 0421c7sdy4, Errors: No active version exists for Dataset sample_input1, Content is b'{"errors":[{"title":"Internal server error","message":"No active version exists for Dataset sample_input1","extra_info":{}}]}'



### Calculate the Mean weight for the two Datasets.

In [35]:
mean_config = Mean(variable="Weight") 
session.project.aggregate_dataset_metric(dataset_uids, mean_config)


MetricResponse(output={'mean': 96.92727272727272}, metric_configuration_dict={'metric': 'mean', 'arguments': '{"data_filters":[],"count_variable_name":"variable","variable":"Weight"}'}, dataset_uids=['90c9d01d-9e74-423a-893c-2bc3b6ec2b21', 'aca46c02-cbc2-412f-9b51-b81e29f89a58'], session=<rhino_health.lib.rhino_session.RhinoSession object at 0x1244b86d0>)

### Get the two by two table for detected and exposed values.

In [36]:
tbtt = TwoByTwoTable(
                    variable="id",
                    detected_column_name="Pneumonia",
                    exposed_column_name="Smoking",
)
table = session.project.aggregate_dataset_metric(dataset_uids, tbtt)
pd.DataFrame(table.as_table())

Unnamed: 0,True,False
True,18,5
False,19,5


### Calculate the Odds Ratio between the "Pneumonia" feature and the "Smoking" feature for the two Datasets.

In [37]:
odds_ratio = OddsRatio(
                variable="id",
                detected_column_name="Pneumonia",
                exposed_column_name="Smoking",
)

session.project.aggregate_dataset_metric(dataset_uids, odds_ratio)

  __pydantic_self__.__pydantic_validator__.validate_python(data, self_instance=__pydantic_self__)


MetricResponse(output={'odds_ratio': 0.9473684210526315}, metric_configuration_dict={'metric': 'odds_ratio', 'arguments': '{"data_filters":[],"count_variable_name":"variable","variable":"id","detected_column_name":"Pneumonia","exposed_column_name":"Smoking"}'}, dataset_uids=['90c9d01d-9e74-423a-893c-2bc3b6ec2b21', 'aca46c02-cbc2-412f-9b51-b81e29f89a58'], session=<rhino_health.lib.rhino_session.RhinoSession object at 0x1244b86d0>)

### Calculate Chi Square test statistic between the "Pneumonia" feature and the "Smoking" feature for the two Datasets.

In [38]:
chi_square = ChiSquare(
            variable="id",
            variable_1="Pneumonia",
            variable_2="Smoking"
)
session.project.aggregate_dataset_metric(dataset_uids, chi_square)

MetricResponse(output={'chi_square': {'statistic': 0.00575303564433998, 'p_value': 0.9998841449487383, 'dof': 1}}, metric_configuration_dict={'metric': 'chi_square', 'arguments': '{"data_filters":[],"count_variable_name":"variable","variable":"id","variable_1":"Pneumonia","variable_2":"Smoking"}'}, dataset_uids=['90c9d01d-9e74-423a-893c-2bc3b6ec2b21', 'aca46c02-cbc2-412f-9b51-b81e29f89a58'], session=<rhino_health.lib.rhino_session.RhinoSession object at 0x1244b86d0>)

### Calculate T-Test between the "Pneumonia" feature and the "Spo2 Level" feature for the two Datasets.

In [39]:
t_test = TTest(numeric_variable="Spo2 Level", categorical_variable="Pneumonia")
session.project.aggregate_dataset_metric(dataset_uids, t_test)

MetricResponse(output={'t_test': {'statistic': -1.8543214061737123, 'p_value': 0.07486568059681697, 'dof': 26.47342509623292}}, metric_configuration_dict={'metric': 't_test', 'arguments': '{"data_filters":[],"count_variable_name":"variable","numeric_variable":"Spo2 Level","categorical_variable":"Pneumonia"}'}, dataset_uids=['90c9d01d-9e74-423a-893c-2bc3b6ec2b21', 'aca46c02-cbc2-412f-9b51-b81e29f89a58'], session=<rhino_health.lib.rhino_session.RhinoSession object at 0x1244b86d0>)

### Calculate One Way ANOVA between the "Inflammation Level" feature and the "Spo2 Level" feature for the two Datasets.

In [41]:
anova_config = OneWayANOVA(
            variable="id",
            numeric_variable="Spo2 Level",
            categorical_variable="Inflammation Level",
        )

result = project.aggregate_dataset_metric(dataset_uids, anova_config)

print(f"\nANOVA result: {result}")


ANOVA result: output={'one_way_ANOVA': {'statistic': 1.343284460075524, 'p_value': 0.26989159024739706, 'sst': 1.487793927272727, 'sse': 1.4147035688259109, 'ssc': 0.07309035844681656, 'msc': 0.03654517922340828, 'mse': 0.027205837862036748, 'dfc': 2, 'dfe': 52, 'dft': 54}} metric_configuration_dict={'metric': 'one_way_ANOVA', 'arguments': '{"data_filters":[],"count_variable_name":"variable","variable":"id","numeric_variable":"Spo2 Level","categorical_variable":"Inflammation Level"}'} dataset_uids=['90c9d01d-9e74-423a-893c-2bc3b6ec2b21', 'aca46c02-cbc2-412f-9b51-b81e29f89a58'] session=<rhino_health.lib.rhino_session.RhinoSession object at 0x1244b86d0>
