## Import modules


In [1]:
from spezi_data_pipeline.data_access.firebase_fhir_data_access import FirebaseFHIRAccess
from spezi_data_pipeline.data_flattening.fhir_resources_flattener import flatten_fhir_resources, FHIRDataFrame
from spezi_data_pipeline.data_processing.data_processor import FHIRDataProcessor
from spezi_data_pipeline.data_processing.observation_processor import calculate_activity_index
from spezi_data_pipeline.data_exploration.data_explorer import DataExplorer, visualizer_factory, explore_total_records_number
from spezi_data_pipeline.data_export.data_exporter import DataExporter

## Define credential files

In [2]:
# Define your Firebase project ID
project_id = "spezi-data-pipeline"  # Replace with your Firebase project ID
project_id = "cs342-2023-paws"

# Define the service account key file
service_account_key_file = "path_to_service_account_key_file.json"  # Replace with your service account key file
service_account_key_file = "cs342-2023-paws-firebase-adminsdk-cz0fu-444bf65586.json"

# Define the collection name where your FHIR observations are stored and the input code if filtering is needed
collection_name = "users"
subcollection_name = "HealthKit"

# Define the list of HealthKit quantities to query from Firebase Firestore
loinc_codes = ["55423-8", "8867-4"]
# loinc_codes = ["131328"]

## Initialize the FirebaseFHIRAccess class using your Firebase credentials

In [3]:
# Initialize the FirebaseFHIRAccess class using your Firebase credentials
firebase_access = FirebaseFHIRAccess(project_id, service_account_key_file)
firebase_access.connect()

fhir_observations = firebase_access.fetch_data(collection_name, subcollection_name, loinc_codes)

flattened_fhir_dataframe = flatten_fhir_resources(fhir_observations)

# check before fetching if the loinc_code is supported.
# check if ecg and other loinc codes together. ensure ecg is parsed alone. chekc if loinc_codes contains
# something else together with ECG len(loinc_codes) = 1

In [4]:
flattened_fhir_dataframe.df.head()

Unnamed: 0,UserId,ResourceId,EffectiveDateTime,QuantityName,LoincCode,Display,AppleHealthKitCode,QuantityUnit,QuantityValue
0,3EUoHxIuYkWMKcnLfK38nTGOqHn1,0840A317-CD4A-4CA8-9975-96A9B6877116,2024-04-02,Step Count,55423-8,Number of steps in unspecified time Pedometer,HKQuantityTypeIdentifierStepCount,steps,21.0
1,3EUoHxIuYkWMKcnLfK38nTGOqHn1,17776E12-9FCB-410A-96E7-6F546C83614F,2024-04-02,Step Count,55423-8,Number of steps in unspecified time Pedometer,HKQuantityTypeIdentifierStepCount,steps,25.0
2,3EUoHxIuYkWMKcnLfK38nTGOqHn1,182B9F13-98F0-4D66-BCF3-05F5F9EB83BE,2024-04-02,Step Count,55423-8,Number of steps in unspecified time Pedometer,HKQuantityTypeIdentifierStepCount,steps,38.0
3,3EUoHxIuYkWMKcnLfK38nTGOqHn1,2039DD0B-E1F1-4E8D-A748-7331F17DE8D4,2024-04-02,Step Count,55423-8,Number of steps in unspecified time Pedometer,HKQuantityTypeIdentifierStepCount,steps,441.0
4,3EUoHxIuYkWMKcnLfK38nTGOqHn1,238D6D1A-B370-4C8E-B22D-3484E688143F,2024-04-02,Step Count,55423-8,Number of steps in unspecified time Pedometer,HKQuantityTypeIdentifierStepCount,steps,296.0


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import table
import textwrap

df = processed_fhir_dataframe.df.iloc[0:4,:]
# df = flattened_fhir_dataframe.df.iloc[0:4,:]
# df["ECGRecording"] = "data"

fig, ax = plt.subplots(figsize=(12*1.4, 3*1.4)) 

ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
ax.set_frame_on(False)

# Function to wrap text in cells
def wrap_text(text, width=10):
    return "\n".join(textwrap.wrap(text, width))

# Apply text wrapping to the DataFrame
wrapped_df = df.applymap(lambda x: wrap_text(str(x)))

tbl = table(ax, wrapped_df, loc='center', cellLoc='center', colWidths=[0.2]*len(df.columns))

tbl.auto_set_font_size(False)
tbl.set_fontsize(12)
tbl.scale(1.2, 1.2)

for key, cell in tbl.get_celld().items():
    cell.set_text_props(ha='center', va='center')
    # Adjust the cell height based on the number of lines
    lines = str(cell.get_text().get_text()).count('\n') + 1
    cell.set_width(0.2)
    cell.set_height(2 * 0.3) 


# plt.savefig('processed_step_counts_table_image.png', bbox_inches='tight', dpi=300)

plt.show()

## Explore the number of recordings in the database by LOINC code  

In [None]:
explore_total_records_number(flattened_fhir_dataframe.df)

In [None]:
start_date = end_date = user_ids = None
df = flattened_fhir_dataframe.df

df["EffectiveDateTime"] = pd.to_datetime(df["EffectiveDateTime"])

if start_date is not None and end_date is not None:
    df = df[
        (df["EffectiveDateTime"] >= start_date)
        & (df["EffectiveDateTime"] <= end_date)
    ]

if isinstance(user_ids, str):
    user_ids = [user_ids]

if user_ids is not None:
    df = df[df["UserId"].isin(user_ids)]

counts = df.groupby(["LoincCode", "UserId"]).size().unstack(fill_value=0)

fig, ax = plt.subplots(figsize=(16, 8))  # Adjust the size as needed
counts.plot(kind="bar", ax=ax)

# Set title and labels
plt.title("Number of records by Loinc code", fontsize=16)
plt.xlabel("Loinc code", fontsize=14)
plt.ylabel("Count", fontsize=14)
plt.xticks(rotation=45, ha="right", fontsize=12)

# Set legend below the plot
plt.legend(
    title="User ID",
    fontsize=12,
    title_fontsize=14,
    bbox_to_anchor=(0.5, -0.24),  # Adjust this to move the legend closer or further
    loc="upper center",
    ncol=2
)

plt.tight_layout()
plt.savefig('plot_total_resources.png', bbox_inches='tight', dpi=300)

plt.show()


## Apply basic processing for convenient data readability

In [None]:
processed_fhir_dataframe = FHIRDataProcessor().process_fhir_data(flattened_fhir_dataframe)
processed_fhir_dataframe.df.head()

## Explore Data

### HealthKit Quantity Example

In [None]:
selected_users = ["3EUoHxIuYkWMKcnLfK38nTGOqHn1","7uMKVmPZdwgtb9hc6r9YZyYXnwc2", "sgsxyilwB3T3xf3LIvkpSajN3NW2"]
selected_start_date = "2024-02-22"
selected_end_date = "2024-04-02"
explorer = visualizer_factory(processed_fhir_dataframe)

# explorer.set_user_ids(selected_users)
# explorer.set_date_range(selected_start_date, selected_end_date)
# explorer.set_y_bounds(50, 50000)

figs = explorer.create_static_plot(processed_fhir_dataframe)

### Calculate Activity Index

In [None]:
activity_index_fhir_dataframe = calculate_activity_index(processed_fhir_dataframe)

selected_users = ["XrftRMc358NndzcRWEQ7P2MxvabZ"]
# selected_users = ["7uMKVmPZdwgtb9hc6r9YZyYXnwc2"]
explorer = visualizer_factory(activity_index_fhir_dataframe)

explorer.set_user_ids(selected_users)
# explorer.set_date_range(selected_start_date, selected_end_date)

figs = explorer.create_static_plot(activity_index_fhir_dataframe)
activity_index_fhir_dataframe.df.head()

### ECG Recording Example

In [None]:
# selected_users = ["k3BnzOGAO0fIaxkDVXTZKlj3LAu2", "3EUoHxIuYkWMKcnLfK38nTGOqHn1"]

# selected_start_date = "2023-03-13"
# selected_end_date = "2023-03-13"

# visualizer = visualizer_factory(processed_fhir_dataframe)
# visualizer.set_user_ids(selected_users)
# visualizer.set_date_range(selected_start_date, selected_end_date)
# figs = visualizer.plot_ecg_subplots(processed_fhir_dataframe) 

## Export data

In [None]:
selected_users = ["sEmijWpn0vXe1cj60GO5kkjkrdT4"]

exporter = DataExporter(processed_fhir_dataframe)
# exporter.set_user_ids(selected_users)
# exporter.set_date_range(selected_start_date, selected_end_date)
# exporter.set_date_range(selected_start_date, selected_start_date)
exporter.create_and_save_plot("data_plot") 