In [1]:
# Install a pip package in the current Jupy
# ter kernel
import sys
!{sys.executable} -m pip install pandas_profiling

In [2]:
from qmenta.core.platform import Auth, post, parse_response
from getpass import getpass
import pandas as pd

import numpy as np
import utils
import utils_cleaning
import datetime
import pandas_profiling
import json

import os
sys.path.append("../..")

from MSDA_Querry2 import querries

## Registries

In [3]:
project_id_reg = 3202

# base url to connect to the central platform
base_url = "https://platform.qmenta.com"
# PUT YOUR USERNAME (EMAIL) HERE
username = "clement.gautrais@kuleuven.be"
# you will be asked for your password here
password = getpass()

# creation of authentication object
auth_obj = Auth.login(username, password, base_url)

# method to fetch the subjects data
def get_subjects_data(project_id):
    r = post(auth_obj, "/patient_manager/get_patient_list",
            {"_pid":project_id},
            timeout=600.0)

    data_trans = [{
            "id": record["_id"],
            "secret_name":record["patient_secret_name"],
            **{
                k[3:]:record[k]
                for k in record
                if k[:3] == "md_"
            }
    } for record in parse_response(r)]

    for r in data_trans:
        for k in r:
            if isinstance(r[k], dict):
                try:
                    r[k] = datetime.datetime.fromtimestamp(r[k]["$date"]/1000.0)
                except:
                    r[k] = None

    return data_trans

In [4]:
data_reg = get_subjects_data(project_id_reg)
df_raw_reg = pd.DataFrame(data_reg)



### Report

In [17]:
def get_interesting_profile_columns(profile, interesting_columns):
    variables_description = json.loads(profile.to_json())["variables"]
    
    columns_df = {}
    columns_df["variable"] = []
    for col in interesting_columns:
        columns_df[col] = []
    for variable_name, infos in variables_description.items():
        columns_df["variable"].append(variable_name)
        for col in interesting_columns:
            columns_df[col].append(infos[col])
    return pd.DataFrame.from_dict(columns_df)

def get_latex_report(df, report_path="reports/report.tex"):
    os.makedirs(os.path.dirname(report_path), exist_ok=True)
    
    df_enhanced = utils.enhance_registry_data(df.copy())
    df_enhanced.fillna(np.nan, inplace=True)
    df_clean = utils_cleaning.clean_data(df_enhanced,auth_obj,project_id_reg, None, send_qa_staus=False) # Set to true to update qa status

    profile_raw = pandas_profiling.ProfileReport(df_enhanced, minimal=True)
    profile_clean = pandas_profiling.ProfileReport(df_clean, minimal=True)
    
    df_raw_missing = get_interesting_profile_columns(profile_raw, ["n", "p_missing", "n_missing"])
    df_clean_missing = get_interesting_profile_columns(profile_clean, ["n", "p_missing", "n_missing"])
    
    df_raw_missing.rename(columns={"p_missing": "p_missing_raw_data", "n_missing": "n_missing_raw_data"}, inplace=True)
    df_clean_missing.rename(columns={"p_missing": "p_missing_clean_data", "n_missing": "n_missing_clean_data"}, inplace=True)
    df_clean_missing.drop("n", axis=1, inplace=True)
    
    df_missing_info = pd.merge(df_raw_missing, df_clean_missing, on="variable")
    df_missing_info.sort_values("variable", inplace=True)
    df_missing_info.to_latex(report_path, index=False)

In [18]:
get_latex_report(df_raw_reg, "reports/report_reg.tex")


HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=109.0, style=ProgressStyle(descri…







HBox(children=(FloatProgress(value=0.0, description='Render JSON', max=1.0, style=ProgressStyle(description_wi…

HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=109.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='Render JSON', max=1.0, style=ProgressStyle(description_wi…

## Forms

In [11]:
project_id_forms = 3150
data_forms = get_subjects_data(project_id_forms)

In [19]:
df_raw_forms = pd.DataFrame(data_forms)
get_latex_report(df_raw_forms, "reports/report_forms.tex")


HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=113.0, style=ProgressStyle(descri…







HBox(children=(FloatProgress(value=0.0, description='Render JSON', max=1.0, style=ProgressStyle(description_wi…

HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=113.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='Render JSON', max=1.0, style=ProgressStyle(description_wi…

In [20]:
merged_df = pd.concat([df_raw_forms, df_raw_reg])
get_latex_report(merged_df, "reports/report_all.tex")

HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=114.0, style=ProgressStyle(descri…







HBox(children=(FloatProgress(value=0.0, description='Render JSON', max=1.0, style=ProgressStyle(description_wi…

HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=114.0, style=ProgressStyle(descri…

HBox(children=(FloatProgress(value=0.0, description='Render JSON', max=1.0, style=ProgressStyle(description_wi…

In [15]:
df_enhanced_all = utils.enhance_registry_data(merged_df.copy())
df_enhanced_all = utils_cleaning.clean_data(df_enhanced_all,auth_obj,project_id_reg, None, send_qa_staus=False) # Set to true to update qa status

querries.compute_tables(df_enhanced_all[df_enhanced_all["report_source"]=="patients"], report_source = "patients")

  res_values = method(rvalues)


In [16]:
querries.compute_tables(df_enhanced_all[df_enhanced_all["report_source"]=="clinicians"], report_source = "clinicians")