# Example: Metrics calculations using the rhino_health SDK

### Load all necessary libraries, including 'rhino_health'

In [None]:
from getpass import getpass
import rhino_health
import pandas as pd
from rhino_health.lib.metrics import *
from rhino_health.lib.metrics.epidemiology.two_by_two_table_based_metrics import *
from rhino_health.lib.metrics.statistics_tests import ChiSquare, TTest, OneWayANOVA

### Log in to the Rhino Health Platform

**Note: Replace "USERNAME" with your Rhino Health username.**


In [None]:
my_username = "my_email@example.com" # Replace this with the email you use to log into Rhino Health

print("Logging In")
session = rhino_health.login(username=my_username, password=getpass())
print("Logged In")

### Load the Project you would like to calculate metrics for by placing the Project's name below
Replace `PROJECT_NAME` with the name of your project

In [None]:
project = session.project.get_project_by_name("PROJECT_NAME")

Load the datasets you would like to calculate metrics for by placing the datasets names below,
Replace `DATASET_1` & `DATASET_2` with the name of your datasets

In [None]:
dataset_uids = [
    project.get_dataset_by_name("DATASET_1"),
    project.get_dataset_by_name("DATASET_2"),
]

The datasets should be in the following format:

In [None]:
pd.DataFrame({
    'Weight': [84.0, 97.0, 91.0, 90.0, 124.0, 97.0],
    'Pneumonia': [True, True, True, True, False, True],
    'Smoking': [False, True, False, False, True, True],
    'Inflammation Level': ['Low', 'Medium', 'Medium', 'High', 'High', 'High'],
    'Spo2 Level': [0.3, 0.51, 0.12, 0.03, 0.413, 0.3]
})

### Calculate the Mean weight for the two Datasets.

In [None]:
mean_config = Mean(variable="Weight")
session.project.aggregate_dataset_metric(dataset_uids, mean_config)

### Get the two by two table for detected and exposed values.

In [None]:
tbtt = TwoByTwoTable(
                    variable="id",
                    detected_column_name="Pneumonia",
                    exposed_column_name="Smoking",
)
table = session.project.aggregate_dataset_metric(dataset_uids, tbtt)
pd.DataFrame(table.as_table())

### Calculate the Odds Ratio between the "Pneumonia" feature and the "Smoking" feature for the two Datasets.

In [None]:
odds_ratio = OddsRatio(
                variable="id",
                detected_column_name="Pneumonia",
                exposed_column_name="Smoking",
)

session.project.aggregate_dataset_metric(dataset_uids, odds_ratio)

### Calculate Chi Square test statistic between the "Pneumonia" feature and the "Smoking" feature for the two Datasets.

In [None]:
chi_square = ChiSquare(
            variable="id",
            variable_1="Pneumonia",
            variable_2="Smoking"
)
session.project.aggregate_dataset_metric(dataset_uids, chi_square)

### Calculate T-Test between the "Pneumonia" feature and the "Spo2 Level" feature for the two Datasets.

In [None]:
t_test = TTest(numeric_variable="Spo2 Level", categorical_variable="Pneumonia")
session.project.aggregate_dataset_metric(dataset_uids, t_test)

### Calculate One Way ANOVA between the "Inflammation Level" feature and the "Spo2 Level" feature for the two Datasets.

In [None]:
anova_config = OneWayANOVA(
            variable="id",
            numeric_variable="Spo2 Level",
            categorical_variable="Inflammation Level",
        )

result = project.aggregate_dataset_metric(dataset_uids, anova_config)