# Fitting Federated Generalized Linear Model (GLM) coefficients with FCP
Demonstrate usage of the Rhino Health Python SDK for running GLM via NVFlare and extracting the coefficients

#### Prerequisites 
1. Have two datasets imported into FCP with variables on which you want to run the regression (e.g. 'Y', 'X', 'COV1', 'COV2', 'COV3', and 'COV4' in this example)
2. Build a container from the NVFlare-based GLM code from this example and push it to your ECR repo

### Initialization and Login

In [None]:
import getpass
import json
import numpy as np

import rhino_health as rh
from rhino_health.lib.endpoints.code_object.code_object_dataclass import (
    CodeObject,
    CodeObjectCreateInput,
    CodeTypes,
    ModelTrainInput
)
from rhino_health.lib.endpoints.endpoint import NameFilterMode

In [None]:
my_username = "my_email@example.com" # Replace this with the email you use to log into Rhino Health

print("Logging In")
session = rh.login(username=my_username, password=getpass.getpass())
print("Logged In")

### Load the project, datasets, and data schema

In [None]:
my_project_name = "my_project"  # Replace this with your project name on Rhino Health
project = session.project.get_project_by_name(my_project_name)

In [None]:
dataset_search_string = "MyDataset"  # Replace this with a string that exists in all of the relevant datasets' names
datasets = project.search_for_datasets_by_name(dataset_search_string, name_filter_mode=NameFilterMode.CONTAINS)
schema = datasets[0].data_schema

# Note: There are multiple ways to retrive datasets using the SDK, this examples relies on the datasets having similar names, such as "MyDataset 1" and "MyDataset 2"

### Create the code object with the desired configuration (including regression formula)

In [None]:
my_base_ecr_uri = "my_base_ecr_uri"  # Replace this with your workgroup ecr uri
image_name = "image name"  # Replace the name of the docker image uploaded to your ecr, containing the GLM regression using nvflare code

#### Define the config (regression formula, regression family, optimization method, etc.) in the config file

In [None]:
formula = "Yb ~ Xb + COV1 + COV2 + COV3 + COV4"
method = "IRLS"  # Meaning we'll be using IRLS for optimization and not Newton Raphson
glm_type = "Binomial"  # Meaning we'll be using a logistic regression
config_fed_client_path = "examples/nvflare/regression-glm-coeff/config/config_fed_client.json"  # Replace this with the path to your config client file

with open(config_fed_client_path) as f:
    config_fed_client_input = json.loads(f.read())

# Define the formula to use for the regression
config_fed_client_input['executors'][0]['executor']['args']['formula'] = formula
config_fed_client_input['executors'][0]['executor']['args']['method'] = method
config_fed_client_input['executors'][0]['executor']['args']['glm_type'] = glm_type

#### Create the Code Object

In [None]:
code_object_config = CodeObjectCreateInput(
    name=f'GLM Model Name', 
    description="GLM",
    input_data_schema_uids=[schema.uid],
    output_data_schema_uids=[None],
    project_uid= project.uid, 
    code_type=CodeTypes.NVIDIA_FLARE_V2_3, 
    config={"container_image_uri": f"{my_base_ecr_uri}:{image_name}"} 
)

code_object = session.code_object.create_code_object(code_object_config)
print(f"Got code object '{code_object.name}' with uid {code_object.uid}")

### Run the federated model

In [None]:
run_params = ModelTrainInput(
    code_object_uid=code_object.uid, 
    input_dataset_uids=[dataset.uid for dataset in datasets],
    one_fl_client_per_dataset=False ,
    validation_dataset_uids=[], 
    validation_datasets_inference_suffix="",
    timeout_seconds=600,
    config_fed_server=json.dumps(config_fed_server_input), 
    config_fed_client=json.dumps(config_fed_client_input), 
    secrets_fed_client="",
    secrets_fed_server="", 
    sync=False,
)

print(f"Starting to run federated training of {code_object.name}")
model_train = session.code_object.train_model(run_params)
code_run_uid = model_train.code_run_uid
run_result = model_train.wait_for_completion()
print(f"Result status is '{run_result.status.value}', errors={run_result.results_info.get('errors') if run_result.results_info else None}")

### Load and display the resulting coefficients and stderrs

In [None]:
model_output = np.load(session.code_run.get_model_params(code_run_uid), allow_pickle=True)

scalar_value = model_output.item()
betas = scalar_value['beta'] 
stderrs = scalar_value['fed_stderror']
print("Beta      (Stderr)\n" + "\n".join([f"{beta} ({stderr})" for beta, stderr in (zip(betas, stderrs))]))