# Notebook #2: Federated Data Engineering
In this notebook, we'll convert CXR DICOM to JPG files and apply the conversion code to multiple sites.

### Install the Rhino Health Python SDK, Load All Necessary Libraries and Login to the Rhino FCP

In [None]:
pip install --upgrade rhino_health

In [None]:
import getpass
import rhino_health as rh
from rhino_health.lib.endpoints.aimodel.aimodel_dataclass import (
    AIModel,
    AIModelCreateInput,
    AIModelRunInput,
    ModelTypes,
    CodeRunType
)

In [None]:
my_username = "FCP_LOGIN_EMAIL" # Replace this with the email you use to log into Rhino Health
session = rh.login(username=my_username, password=getpass.getpass())

### Retrieve Project and Cohort Information

In [None]:
project = session.project.get_project_by_name("YOUR_PROJECT_NAME")  # Replace with your project name
dataschema = project.data_schemas[0]
print(f"Loaded dataschema '{dataschema.name}' with uid '{dataschema.uid}'")

In [None]:
cxr_schema = project.get_data_schema_by_name('Auto-generated schema for mimic_cxr_dev', project_uid=project.uid)
cxr_schema_uid =cxr_schema.uid

In [None]:
collaborators = project.collaborating_workgroups
workgroups_by_name = {x.name: x for x in collaborators}
workgroups_by_uid = {x.uid: x for x in collaborators}
hco_workgroup = workgroups_by_name["Health System - Sandbox"]
aidev_workgroup = workgroups_by_name["Decode Health - Sandbox"]

print(f"Found workgroups '{aidev_workgroup.name}' and collaborators '{hco_workgroup.name}'")

### Get the CXR Cohorts From Both Sites

In [None]:
cohorts = project.cohorts
cohorts_by_workgroup = {workgroups_by_uid[x.workgroup_uid].name: x for x in cohorts}
hco_cxr_cohort = project.get_cohort_by_name("mimic_cxr_hco")
aidev_cxr_cohort = project.get_cohort_by_name("mimic_cxr_dev")
hco_cxr_cohort_uid = hco_cxr_cohort.uid
aidev_cxr_cohort_uid = aidev_cxr_cohort.uid
print(f"Loaded CXR cohorts '{hco_cxr_cohort.uid}', '{aidev_cxr_cohort.uid}'")

### We will use a Pre-defined Container Image with our Model

In [None]:
cxr_image_uri= "865551847959.dkr.ecr.us-east-1.amazonaws.com/workgroup-rhino-sandbox-decode-health:data-prep-sb-1"

### Define the Generalized Compute Model that will Convert DICOM Images to JPG Files

In [None]:
compute_params = AIModelCreateInput(
    name="DICOM to JPG Transformation Code",
    description="CXR JPG transformation the AI dev and Health System datasets",
    input_data_schema_uids = [cxr_schema_uid],
    output_data_schema_uids = [None], # Auto-Generating the Output Data Schema for the Model
    project_uid = project.uid,
    model_type = ModelTypes.GENERALIZED_COMPUTE,    
    config={"container_image_uri": cxr_image_uri}
)

compute_model = session.aimodel.create_aimodel(compute_params)
print(f"Got aimodel '{compute_model.name}' with uid {compute_model.uid}")

### Run the Model Defined in the Previous Cell

In [None]:
run_params = AIModelRunInput(
  aimodel_uid = compute_model.uid,
  input_cohort_uids = [aidev_cxr_cohort_uid,hco_cxr_cohort_uid],     
  output_cohort_names_suffix = "_conv",
  timeout_seconds = 600
)
model_run = session.aimodel.run_aimodel(run_params)
run_result = model_run.wait_for_completion()
print(f"Finished running {compute_model.name}")
print(f"Result status is '{run_result.status.value}', errors={run_result.result_info.get('errors') if run_result.result_info else None}")