In [7]:
import pandas as pd
import numpy as np
import duckdb
import pyarrow
import matplotlib.pyplot as plt
import datetime
import re
import os
import json

In [2]:
from src.pyclif.clif import CLIF

In [3]:
## get site specific details from project config file
import json
def load_config():
    json_path = 'config/config.json'
    
    with open(json_path, 'r') as file:
        config = json.load(file)
    print("Loaded configuration from config.json")
    
    return config

try:
    config = load_config()
    site_name = config["site_name"]
    tables_path = config["tables_path"]
    file_type = config["file_type"]
except KeyError as e:
    print(f"Missing key in configuration: {e}")
except Exception as e:
    print(f"An error occurred while loading configuration: {e}")

Loaded configuration from config.json


In [4]:
## figure out a way to get config elements from the terminal arguments or 
# ask for input while setting up the clif class
clif = CLIF(data_dir=tables_path, filetype=file_type)

CLIF Object Initialized.


In [6]:
clif.load(["patient", "hospitalization"])

TypeError: load_table() got an unexpected keyword argument 'filters'

In [10]:
# Paths
DATA_DIR = "data"
OUTPUT_DIR = "data"

# Load the all_tables.json
with open(os.path.join(DATA_DIR, "all_tables.json"), "r") as f:
    all_tables = json.load(f)

# Ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

for table_name, columns_def in all_tables.items():
    # columns_def is a dict: {column_name: data_type, ...}
    print("current table", table_name)
    # Determine which are category or group columns
    category_columns = [col for col in columns_def.keys() if col.endswith("_category")]
    group_columns = [col for col in columns_def.keys() if col.endswith("_group")]

    # All columns are considered required for demonstration purposes
    required_columns = list(columns_def.keys())

    columns_info = []
    for col_name, col_type in columns_def.items():
        # Determine if this column is category/group
        is_category = col_name in category_columns
        is_group = col_name in group_columns
        
        # Initialize permissible values
        permissible_values = []
        
        # If category or group, attempt to load permissible values
        if is_category:
            variable_name = col_name.rsplit("_", 1)[0]
            csv_filename = f"clif_{table_name}_{variable_name}_categories.csv"
            csv_path = os.path.join(DATA_DIR, csv_filename)
            if os.path.exists(csv_path):
                df_cat = pd.read_csv(csv_path)
                # Assuming the first column of the csv contains the permissible values
                # Adjust if needed
                permissible_values = df_cat.iloc[:,0].dropna().unique().tolist()
        
        if is_group:
            csv_filename = f"clif_{table_name}_{col_name}_groups.csv"
            csv_path = os.path.join(DATA_DIR, csv_filename)
            if os.path.exists(csv_path):
                df_grp = pd.read_csv(csv_path)
                permissible_values = df_grp.iloc[:,0].dropna().unique().tolist()

        # Build the column structure
        col_info = {
            "name": col_name,
            "data_type": col_type,
            "required": col_name in required_columns,
            "is_category_column": is_category,
            "is_group_column": is_group
        }

        # Add permissible values if category or group
        if is_category or is_group:
            col_info["permissible_values"] = permissible_values

        columns_info.append(col_info)

    # Create the final JSON structure for the table
    table_json = {
        "table_name": table_name,
        "columns": columns_info,
        "required_columns": required_columns,
        "category_columns": category_columns,
        "group_columns": group_columns
    }

    # Write out to a JSON file
    output_path = os.path.join(OUTPUT_DIR, f"{table_name}.json")
    with open(output_path, "w") as outfile:
        json.dump(table_json, outfile, indent=4)

    print(f"Generated {output_path}")

current table patient
Generated data/patient.json
current table hospitalization
Generated data/hospitalization.json
current table adt
Generated data/adt.json
current table vitals
Generated data/vitals.json
current table labs
Generated data/labs.json
current table patient_assessments
Generated data/patient_assessments.json
current table respiratory_support
Generated data/respiratory_support.json
current table medication_admin_continuous
Generated data/medication_admin_continuous.json
current table position
Generated data/position.json
current table microbiology_culture
Generated data/microbiology_culture.json
