# Creating groups of laboratory analyses

The table in `categorized_analyses.csv` was created with the help of a medical doctor.
We will use these groups of analyses to create more laboratory-data-based data sources.

In [None]:
import numpy as np
import pandas as pd

In [None]:
analyses = pd.read_csv("categorized_analyses.csv").drop("category_fr", axis=1).rename(columns={"category_en":"category"})

In [None]:
analyses

In [None]:
categories = analyses.category.unique()
categories, len(categories)

In [None]:
categorized_analyses = {category: group for category, group in analyses.groupby('category')}
categories = list(categorized_analyses.keys())

In [None]:
categorized_analyses["renal"]

In [None]:
grouped_analyses = (
    analyses.drop("itemid", axis=1)
    .groupby("category")
    .agg({"event_count": "sum"})
)

# Add the number of elements in each group
grouped_analyses["num_elements"] = analyses.groupby("category").size()

In [None]:
grouped_analyses

In [None]:
categorized_analyses_codes = {category: list(table["itemid"]) for category, table in categorized_analyses.items()}

In [None]:
import json

In [None]:
output_file = "categorized_analyses.json"

with open(output_file, "w") as f:
    for key, value in categorized_analyses_codes.items():
        json_line = json.dumps({key: value}, separators=(",", ": "), indent=None)
        f.write("    " + json_line[1:-1] + ",\n")  # Remove outer braces and append a new line

# Add final touch to create the JSON-like structure
with open(output_file, "r+") as f:
    lines = f.readlines()
    f.seek(0)
    f.write("{\n")  # Open brace
    f.writelines(lines[:-1])  # Write all lines except the last
    f.write(lines[-1].rstrip(",\n") + "\n}\n")  # Remove last comma, close brace