In [12]:
from qmenta.core.platform import Auth, post, parse_response
from getpass import getpass
import datetime
import pandas as pd

In [13]:
# the id of the project to be used
project_id = 1081

In [14]:
# base url to connect to the central platform
base_url = "https://platform.qmenta.com"
# PUT YOUR USERNAME (EMAIL) HERE
username = "edward.debrouwer@esat.kuleuven.be"
# you will be asked for your password here
password = getpass()

········


In [15]:
# creation of authentication object
auth_obj = Auth.login(username, password, base_url)

In [24]:
# status = pass|fail|<empty>
# patient_id = take id of the get_subjects_data record
# comments = can be any text including HTML
# To be eble to execute this you have to be a QC in the project
def set_qa_status(project_id, patient_id, status, comments):
    r = post(auth_obj, "/projectset_manager/set_qa_status", 
            {"_pid":project_id,
            "item_ids": patient_id,
            "status": status,
            "comments": comments,
            "entity":"patients"}, 
            timeout=600.0)
    return parse_response(r)["success"] == 1

# method to fetch the subjects data
def get_subjects_data(project_id):
    r = post(auth_obj, "/patient_manager/get_patient_list",
            {"_pid":project_id},
            timeout=600.0)

    data_trans = [{
            "id": record["_id"],
            "secret_name":record["patient_secret_name"],
            **{
                k[3:]:record[k]
                for k in record
                if k[:3] == "md_"
            }
    } for record in parse_response(r)]

    for r in data_trans:
        for k in r:
            if isinstance(r[k], dict):
                r[k] = datetime.datetime.fromtimestamp(r[k]["$date"]/1000.0)

    return data_trans


In [25]:
def enhance_registry_data(data_in):
    
    df = pd.DataFrame(data_in)
    df["Origin"] = "Registry"
    df["Report_source"] = "clinicians"
    df.loc[df.secret_name.str.contains("COV0"),"Report_source"] = "clinicians"
    df.loc[df.secret_name.str.contains("COV10"),"Report_source"] = "patients"
    df.loc[df.secret_name.str.contains("COVID14C"),"Report_source"] = "clinicians" # AND patient"
    df.loc[df.secret_name.str.contains("COV15"),"Report_source"] = "patients"
    df.loc[df.secret_name.str.contains("COV19"),"Report_source"] = "clinicians"
    
    df["covid19_diagnosis"] = "not_suspected"

    #Patient reported_data 
    df.loc[(df.Report_source=="patient") & (df.covid19_sympt_fever=="yes")&(df.covid19_suspected_case=="yes") & ((df.covid19_sympt_dry_cough=="yes") | (df.covid19_sympt_shortness_breath=="yes") | (df.covid19_sympt_pneumonia=="yes")),"covid19_diagnosis"] = "suspected"

    df.loc[ (df.Report_source =="patient")
            & (df.covid19_sympt_fever=="yes")  
            & ((df.covid19_sympt_dry_cough=="yes") | (df.covid19_sympt_shortness_breath == "yes")| (df.covid19_sympt_pneumonia == "yes"))
            & (((df.covid19_sympt_fatigue=="yes") | (df.covid19_sympt_pain=="yes")) | (df.covid19_sympt_nasal_congestion=="yes") | (df.covid19_sympt_chills=="yes")|(df.covid19_sympt_loss_smell_taste=="yes")|(df.covid19_sympt_sore_throat=="yes")), "covid19_diagnosis"] = "suspected"

    df.loc[ (df.Report_source == "patient") & (df.covid19_confirmed_case=="yes"),"covid19_diagnosis"] = "confirmed"

    #Clinicians reported data
    df.loc[(df.Report_source.str.contains("clinician")) & (df.covid19_suspected_case=="yes"),"covid19_diagnosis"]= "suspected"
    df.loc[(df.Report_source.str.contains("clinician")) & (df.covid19_confirmed_case=="yes"),"covid19_diagnosis"]= "confirmed"
    
    

    

In [None]:
def assess_registry_data():



In [None]:
def repair_registry_data():
    

### Some cleaning procedures we already had

In [17]:
# CREATING REPORT SOURCE (clinician or patients)

# the id of the project to be used
project_id_reg = 3202
project_id_forms = 3150
#the ID of the registry project is 3202
#the ID of the public forms data is 3150

# base url to connect to the central platform
base_url = "https://platform.qmenta.com"
# PUT YOUR USERNAME (EMAIL) HERE
username = "edward.debrouwer@esat.kuleuven.be"
# you will be asked for your password here
password = getpass()

# creation of authentication object
auth_obj = Auth.login(username, password, base_url)

# method to fetch the subjects data
def get_subjects_data(project_id):
    r = post(auth_obj, "/patient_manager/get_patient_list",
            {"_pid":project_id},
            timeout=600.0)

    return [{
                "secret_name":record["patient_secret_name"],
                **{
                    k[3:]:record[k]
                    for k in record
                    if k[:3] == "md_"
                }
        } for record in parse_response(r)]


data_reg = get_subjects_data(project_id_reg)
data_forms = get_subjects_data(project_id_forms)
print(f"Total number of subjects: {len(data_reg)}")
print(f"Total number of subjects: {len(data_forms)}")

df_reg = pd.DataFrame(data_reg)
df_reg["Origin"] = "Registry"
df_forms = pd.DataFrame(data_forms)
df_forms["Origin"] = "Forms"

df_reg["Report_source"] = "clinicians"
df_forms["Report_source"] = "clinicians"

#Clean ICONQUERMS mess with COVID10 and replace with COV10
#df_reg.loc[df_reg.secret_name.str.contains("COVID10"),"secret_name"] = df_reg.loc[df_reg.secret_name.str.contains("COVID10"),"secret_name"].apply(lambda x : "COV10"+x[7:])

df_reg.loc[df_reg.secret_name.str.contains("COV0"),"Report_source"] = "clinicians"
df_reg.loc[df_reg.secret_name.str.contains("COV10"),"Report_source"] = "patients"
df_reg.loc[df_reg.secret_name.str.contains("COVID14C"),"Report_source"] = "clinicians" # AND patient"
df_reg.loc[df_reg.secret_name.str.contains("COV15"),"Report_source"] = "patients"
df_reg.loc[df_reg.secret_name.str.contains("COV19"),"Report_source"] = "clinicians"

df_forms.loc[df_forms.secret_name.str.contains("C_"),"Report_source"] = "clinicians"
df_forms.loc[df_forms.secret_name.str.contains("P_"),"Report_source"] = "patients"

df = pd.concat([df_reg,df_forms],sort = True)

df.drop_duplicates(subset = ["secret_name"], inplace = True)

df_clinicians = df.loc[df.Report_source.str.contains("clinicians")].copy()
df_patients = df.loc[df.Report_source.str.contains("patients")].copy()


········
Total number of subjects: 2399
Total number of subjects: 1187


In [18]:
#CREATION OF THE COVID19_diagnosis
df["covid19_diagnosis"] = "not_suspected"

#Patient reported_data 
df.loc[(df.Report_source=="patient") & (df.covid19_sympt_fever=="yes")&(df.covid19_suspected_case=="yes") & ((df.covid19_sympt_dry_cough=="yes") | (df.covid19_sympt_shortness_breath=="yes") | (df.covid19_sympt_pneumonia=="yes")),"covid19_diagnosis"] = "suspected"

df.loc[ (df.Report_source =="patient")
        & (df.covid19_sympt_fever=="yes")  
        & ((df.covid19_sympt_dry_cough=="yes") | (df.covid19_sympt_shortness_breath == "yes")| (df.covid19_sympt_pneumonia == "yes"))
        & (((df.covid19_sympt_fatigue=="yes") | (df.covid19_sympt_pain=="yes")) | (df.covid19_sympt_nasal_congestion=="yes") | (df.covid19_sympt_chills=="yes")|(df.covid19_sympt_loss_smell_taste=="yes")|(df.covid19_sympt_sore_throat=="yes")), "covid19_diagnosis"] = "suspected"

df.loc[ (df.Report_source == "patient") & (df.covid19_confirmed_case=="yes"),"covid19_diagnosis"] = "confirmed"

#Clinicians reported data
df.loc[(df.Report_source.str.contains("clinician")) & (df.covid19_suspected_case=="yes"),"covid19_diagnosis"]= "suspected"
df.loc[(df.Report_source.str.contains("clinician")) & (df.covid19_confirmed_case=="yes"),"covid19_diagnosis"]= "confirmed"

In [19]:
def clean_heights_weights(df_in):
    df = df_in.copy()
    df.height = pd.to_numeric(df.height,errors = "coerce")
    df.weight = pd.to_numeric(df.weight, errors = "coerce")
    df.loc[(df.height<100) | (df.height>210), "height"] = np.nan
    df.loc[(df.weight<30) | (df.weight>300), "weight"] = np.nan
    return df

#Clean heights and weights AND CREATE BMI 
if (("weight" in df) and ("height" in df)):
    df = clean_heights_weights(df)
    df["bmi"] = df.weight/ (df.height/100)**2
else:
    print(f" Warning : weight or height not in data !")

In [20]:
def create_edss_cat(df_in):
    df = df_in.copy()
    # EDSS in cat
    if "edss_value" in df:
        df["edss_in_cat"] = None

        df['edss_value'] = pd.to_numeric(df['edss_value'],errors='coerce')
        df.loc[(df.edss_value>0) & (df.edss_value<=3),"edss_in_cat"] = 0
        df.loc[(df.edss_value>3) & (df.edss_value<=6),"edss_in_cat"] = 1
        df.loc[(df.edss_value>6),"edss_in_cat"] = 2
    else:
        print(f" Warning : edss_value not in data !")

In [21]:
def create_bmi_cat(df_in):
    df = df_in.copy()
    # BMI in cat
    if "bmi" in df:
        df["bmi_in_cat"] = None
        df.loc[(df.bmi<18.5) & (df.bmi>0),"bmi_in_cat"] = "underweight"
        df.loc[(df.bmi<=25) & (df.bmi>=18.5),"bmi_in_cat"] = "normal"
        df.loc[(df.bmi<=30) & (df.bmi>25),"bmi_in_cat"] = "overweight"
        df.loc[(df.bmi<=35) & (df.bmi>30),"bmi_in_cat"] = "class I obesity"
        df.loc[ (df.bmi>35),"bmi_in_cat"] = "class II obesity"
        
        df["bmi_in_cat2"] = None
        df.loc[df.bmi<=25,"bmi_in_cat2"] = "not_overweight"
        df.loc[df.bmi>25,"bmi_in_cat2"]  = "overweight"

In [22]:
def create_age_cat(df_in):
    df = df_in.copy()
    if "age_years" in df:
        df["Age_in_cat"] = None
        df.loc[(df.age_years<18) & (df.age_years>0),"Age_in_cat"] = 0
        df.loc[(df.age_years<=50) & (df.age_years>=18),"Age_in_cat"] = 1
        df.loc[(df.age_years<=70) & (df.age_years>50),"Age_in_cat"] = 2
        df.loc[ (df.age_years>70),"Age_in_cat"] = 3