In [None]:
import lifelines

In [155]:
import requests
import types
import pandas as pd
import numpy as np
import random
import math
from scipy.stats import fisher_exact

from datetime import datetime, timedelta

np.random.seed(42)
random.seed(42)

ISARIC_libs_url='https://raw.githubusercontent.com/ISARICResearch/VERTEX/refs/heads/dev/'
getRC = types.ModuleType("getREDCapData")
idw = types.ModuleType("IsaricDraw")
ia = types.ModuleType("IsaricAnalytics")


exec(requests.get(ISARIC_libs_url+'getREDCapData.py').text, getRC.__dict__)
exec(requests.get(ISARIC_libs_url+'IsaricDraw.py').text, idw.__dict__)
exec(requests.get(ISARIC_libs_url+'IsaricAnalytics.py').text, ia.__dict__)


In [156]:

def compute_odds_ratio(table):
    """
    Compute odds ratio, 95% confidence interval, and p-value from a 2x2 contingency table.

    Parameters:
        table (array-like): 2x2 table in the format [[a, b], [c, d]]
            where:
                a = exposed cases
                b = exposed non-cases
                c = unexposed cases
                d = unexposed non-cases

    Returns:
        dict: {
            'odds_ratio': float,
            'ci_low': float,
            'ci_high': float,
            'p_value': float
        }
    """
    a, b = table[0]
    c, d = table[1]

    # Odds ratio
    or_value = (a * d) / (b * c)

    # Standard error of log(OR)
    se = math.sqrt(1/a + 1/b + 1/c + 1/d)

    # Confidence interval
    log_or = math.log(or_value)
    ci_low = math.exp(log_or - 1.96 * se)
    ci_high = math.exp(log_or + 1.96 * se)

    # Fisher's exact test p-value
    _, p_value = fisher_exact(table)

    return {
        'odds_ratio': or_value,
        'ci_low': ci_low,
        'ci_high': ci_high,
        'p_value': p_value
    }


In [157]:
def convert_age_to_years(row):
    age = row['demog_age']
    units = row['demog_age_units']

    try:
        age = float(age)
    except (ValueError, TypeError):
        return None

    if units == 'Years':  # Years
        return age
    elif units == 'Months':  # Months
        return age / 12
    elif units == 'Days':  # Days
        return age / 365.25
    else:
        return None  # Unknown or invalid units


In [158]:
def compute_charlson_index(row):
    score = 0

    # 1-point conditions
    if row['comor_chrcardiac_mi'] == 'Yes':
        score += 1
    if row['comor_chrcardiac_chf'] == 'Yes':
        score += 1
    if row['comor_perivascdis'] == 'Yes':
        score += 1
    if row['comor_strokecva'] == 'Yes':
        score += 1
    if row['comor_dementia'] == 'Yes':
        score += 1
    if row['comor_chrpulmona'] == 'Yes':
        score += 1
    if row['comor_rheumatolo'] == 'Yes':
        score += 1
    if row['comor_peptulcdis'] == 'Yes':
        score += 1

    # Liver disease
    if row['comor_liverdisease'] == 'Yes':
        if row['comor_liverdisease_type'] == 'Mild':
            score += 1
        elif row['comor_liverdisease_type'] in ['Moderate or severe', 'Moderate', 'Severe']:
            score += 3

    # Diabetes
    if row['comor_diabetes_yn'] == 'Yes':
        if row['comor_diabetes_endorg'] == 'Yes':
            score += 2
        else:
            score += 1

    # Hemiplegia / Paraplegia
    if row['comor_hemipleg'] == 'Yes' or row['comor_parapleg'] == 'Yes':
        score += 2

    # Renal disease (stage 4–5)
    if row['comor_chrkidney'] == 'Yes':
        if row['comor_chrkidney_stag'] in ['Stage 4', 'Stage 5']:
            score += 2

    # Malignancy
    if row['comor_malignantn_yn'] == 'Yes':
        score += 2

    # Metastatic solid tumor
    if row['comor_malignantn_sol'] == 'Yes' and row['comor_malignantn_leuk'] != 'Yes' and row['comor_malignantn_lymp'] != 'Yes':
        score += 6

    # AIDS/HIV
    if row['comor_aids_yn'] == 'Yes':
        score += 6

    # Age adjustment
    age = row['demog_age']
    try:
        
        age = int(age)
        if age >= 50:
            score += (age - 50) // 10 + 1
    except:
        pass  # handle missing or invalid age silently

    return score

def has_any_comorbidity(row):
    if row['comor_chrcardiac_mi'] == 'Yes':
        return 1
    if row['comor_chrcardiac_chf'] == 'Yes':
        return 1
    if row['comor_perivascdis'] == 'Yes':
        return 1
    if row['comor_strokecva'] == 'Yes':
        return 1
    if row['comor_dementia'] == 'Yes':
        return 1
    if row['comor_chrpulmona'] == 'Yes':
        return 1
    if row['comor_rheumatolo'] == 'Yes':
        return 1
    if row['comor_peptulcdis'] == 'Yes':
        return 1

    if row['comor_liverdisease'] == 'Yes':
        return 1

    if row['comor_diabetes_yn'] == 'Yes':
        return 1

    if row['comor_hemipleg'] == 'Yes' or row['comor_parapleg'] == 'Yes':
        return 1

    if row['comor_chrkidney'] == 'Yes':
        return 1

    if row['comor_malignantn_yn'] == 'Yes':
        return 1

    if row['comor_malignantn_sol'] == 'Yes':
        return 1

    if row['comor_aids_yn'] == 'Yes':
        return 1

    return 0


In [159]:
config_file = requests.get("https://raw.githubusercontent.com/ISARICResearch/VERTEX/refs/heads/dev/projects/ARChetypeCRF_h5nx_synthetic_mf/config_file.json").json() 
redcap_url=config_file['api_url']
#redcap_api_key=config_file['api_key']
redcap_api_key="5B000D8BCBFB9113E46ED1226BEAD5BD"
data = getRC.get_records(redcap_url, redcap_api_key)
#dictionary = getRC.get_data_dictionary(redcap_url, redcap_api_key)
#missing_data_codes = getRC.get_missing_data_codes(redcap_url, redcap_api_key)

data=data.groupby('subjid').max().reset_index()
data['demog_age'] = data.apply(convert_age_to_years, axis=1)
data=data.drop(columns=['demog_age_units','redcap_event_name','redcap_repeat_instrument','redcap_repeat_instance'])


HTTP Status: 200


In [160]:

def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end - start).days))

sex_options = ['Male', 'Female']
yes_no_unknown = ['Yes', 'No', 'Unknown']

def generate_patient_0(i):
    age = np.random.randint(18, 90)
    sex = random.choice(sex_options)
    date_adm = random_date(datetime(2025, 1, 1), datetime(2025, 4, 1)).date()
    outcome_date = date_adm + timedelta(days=np.random.randint(3, 21))

    def maybe_yes(prob=0.2):  # 20% chance of Yes
        return np.random.choice(['Yes', 'No'], p=[prob, 1 - prob])

    def maybe_yes_highrisk(age):  # Older patients more likely to have comorbidities
        prob = max(0.0, min(0.1 + (age - 40) * 0.02, 0.8))
        return np.random.choice(['Yes', 'No'], p=[prob, 1 - prob])


    return {
        'subjid': i + 2,
        'dates_adm': 'Yes',
        'dates_admdate': str(date_adm),
        'demog_sex': sex,
        'demog_sex_oth': '',
        'demog_age': float(age),
        'comor_dementia': 'No',
        'comor_strokecva': 'No',
        'comor_hemipleg':'No',
        'comor_parapleg': 'No',
        'comor_chrcardiac': 'No',
        'comor_chrcardiac_mi':'No',
        'comor_chrcardiac_chf': 'No',
        'comor_perivascdis': 'No',
        'comor_chrpulmona': 'No',
        'comor_peptulcdis': 'No',
        'comor_chrkidney': 'No',
        'comor_chrkidney_stag': 'No',
        'comor_liverdisease': 'No',
        'comor_liverdisease_type': 'No',
        'comor_aids_yn': 'No',
        'comor_tuberculos': 'No',
        'comor_tuberculos_spec___1': 'Unchecked',
        'comor_tuberculos_spec___4': 'Unchecked',
        'comor_tuberculos_spec___3': 'Unchecked',
        'comor_tuberculos_spec___99': 'Unchecked',
        'comor_tuberculos_spec___ni': 'Unchecked',
        'comor_tuberculos_spec___unk': 'Unchecked',
        'comor_tuberculos_spec___nask': 'Unchecked',
        'comor_tuberculos_spec___na': 'Unchecked',
        'comor_benignn': 'No',
        'comor_malignantn_yn': 'No',
        'comor_malignantn_sol': 'No',
        'comor_malignantn_leuk': 'No',
        'comor_malignantn_lymp': 'No',
        'comor_rheumatolo': 'No',
        'comor_diabetes_yn': 'No',
        'comor_diabetes_type1': 'No',
        'comor_diabetes_type2': 'No',
        'comor_diabetes_gdm': 'No',
        'comor_diabetes_endorg': 'No',
        'comor_malnutriti': 'No',
        'presentation_complete': 'Complete',
        'diagn_dengue_test': np.random.choice(['Lab confirmed', 'Clinically suspected']),
        'outco_deng_diag': 'Yes',
        'outco_deng_diagclass': np.random.choice(
            ['Uncomplicated dengue', 'Dengue with warning signs', 'Severe dengue'],
            p=[0.6, 0.3, 0.1]
        ),
        'outco_date': str(outcome_date),
        'outco_outcome': np.random.choice(['Discharged alive', 'Deceased', 'Ongoing'],p=[0.6, 0.3, 0.1]),
        'outcome_complete': 'Complete'
    }

def generate_patient(i):
    age = np.random.randint(18, 90)
    sex = random.choice(sex_options)
    date_adm = random_date(datetime(2025, 1, 1), datetime(2025, 4, 1)).date()
    outcome_date = date_adm + timedelta(days=np.random.randint(3, 21))

    def maybe_yes(prob=0.2):  # 20% chance of Yes
        return np.random.choice(['Yes', 'No'], p=[prob, 1 - prob])

    def maybe_yes_highrisk(age):  # Older patients more likely to have comorbidities
        prob = max(0.0, min(0.1 + (age - 40) * 0.02, 0.8))
        return np.random.choice(['Yes', 'No'], p=[prob, 1 - prob])


    return {
        'subjid': i + 2,
        'dates_adm': 'Yes',
        'dates_admdate': str(date_adm),
        'demog_sex': sex,
        'demog_sex_oth': '',
        'demog_age': float(age),
        'comor_dementia': maybe_yes_highrisk(age),
        'comor_strokecva': maybe_yes_highrisk(age),
        'comor_hemipleg': maybe_yes(),
        'comor_parapleg': maybe_yes(),
        'comor_chrcardiac': maybe_yes_highrisk(age),
        'comor_chrcardiac_mi': maybe_yes_highrisk(age),
        'comor_chrcardiac_chf': maybe_yes_highrisk(age),
        'comor_perivascdis': np.random.choice(yes_no_unknown, p=[0.2, 0.7, 0.1]),
        'comor_chrpulmona': np.random.choice(yes_no_unknown, p=[0.3, 0.6, 0.1]),
        'comor_peptulcdis': np.random.choice(yes_no_unknown, p=[0.1, 0.8, 0.1]),
        'comor_chrkidney': maybe_yes_highrisk(age),
        'comor_chrkidney_stag': np.random.choice(
            ['Stage 1', 'Stage 2', 'Stage 3a', 'Stage 3b', 'Stage 4', 'Stage 5', ''],
            p=[0.1, 0.15, 0.15, 0.1, 0.1, 0.1, 0.3]
        ),
        'comor_liverdisease': maybe_yes(),
        'comor_liverdisease_type': np.random.choice(
            ['Mild', 'Moderate or severe', ''],
            p=[0.3, 0.3, 0.4]
        ),
        'comor_aids_yn': maybe_yes(prob=0.02),
        'comor_tuberculos': maybe_yes(prob=0.05),
        'comor_tuberculos_spec___1': 'Unchecked',
        'comor_tuberculos_spec___4': 'Unchecked',
        'comor_tuberculos_spec___3': 'Unchecked',
        'comor_tuberculos_spec___99': 'Unchecked',
        'comor_tuberculos_spec___ni': 'Unchecked',
        'comor_tuberculos_spec___unk': 'Unchecked',
        'comor_tuberculos_spec___nask': 'Unchecked',
        'comor_tuberculos_spec___na': 'Unchecked',
        'comor_benignn': maybe_yes(prob=0.1),
        'comor_malignantn_yn': maybe_yes_highrisk(age),
        'comor_malignantn_sol': maybe_yes(),
        'comor_malignantn_leuk': maybe_yes(),
        'comor_malignantn_lymp': maybe_yes(),
        'comor_rheumatolo': maybe_yes(),
        'comor_diabetes_yn': maybe_yes_highrisk(age),
        'comor_diabetes_type1': maybe_yes(prob=0.05),
        'comor_diabetes_type2': maybe_yes_highrisk(age),
        'comor_diabetes_gdm': maybe_yes(prob=0.05),
        'comor_diabetes_endorg': maybe_yes(),
        'comor_malnutriti': maybe_yes(prob=0.05),
        'presentation_complete': 'Complete',
        'diagn_dengue_test': np.random.choice(['Lab confirmed', 'Clinically suspected']),
        'outco_deng_diag': 'Yes',
        'outco_deng_diagclass': np.random.choice(
            ['Uncomplicated dengue', 'Dengue with warning signs', 'Severe dengue'],
            p=[0.6, 0.3, 0.1]
        ),
        'outco_date': str(outcome_date),
        'outco_outcome': np.random.choice(['Discharged alive', 'Deceased', 'Ongoing'],p=[0.3, 0.7, 0.0]),
        'outcome_complete': 'Complete'
    }

# Generate 100 patients
fake_data = pd.DataFrame([generate_patient(i) for i in range(1000)])
fake_0=pd.DataFrame([generate_patient_0(i) for i in range(1000,1300)])


data=pd.concat([data,fake_data,fake_0])

In [161]:
path_public='C:/Users/egarcia/OneDrive - Nexus365/Projects/PublicVERTEX/TEST_Dengue_comorbidities/PUBLIC/'
outcome=data[['subjid','outco_outcome']].loc[data['outco_outcome']!=''].drop_duplicates()
outcome=outcome['outco_outcome'].value_counts().reset_index()

fig=idw.fig_pie(outcome,
        title='Patient oucomes',
        item='outco_outcome',
        value='count',
        suffix='', filepath=path_public+'', save_inputs=False,
        graph_id='', graph_label='', graph_about='')
fig[0]

In [162]:
#patient_data=data[['subjid','dates_adm','dates_admdate','outco_outcome','diagn_dengue_test','outco_deng_diag','outco_deng_diagclass','outco_date','outco_outcome']].loc[data['outco_outcome']!=''].drop_duplicates()

print(len(data),'all')
patient_data=data.loc[data['diagn_dengue_test']=='Lab confirmed']
print(len(patient_data),'lab confirmed')

patient_data=patient_data.loc[patient_data['dates_adm']=='Yes']
print(len(patient_data),'Hospital')




patient_data=patient_data.loc[patient_data['outco_deng_diag']=='Yes']
print(len(patient_data),'dengue diagnosis')


#patient_data=patient_data[['subjid','dates_adm','dates_admdate','outco_outcome','diagn_dengue_test','outco_deng_diag','outco_deng_diagclass','outco_date','outco_outcome']].loc[patient_data['outco_outcome']!=''].drop_duplicates()
#print(len(patient_data))

1301 all
658 lab confirmed
658 Hospital
658 dengue diagnosis


In [None]:
patient_data['has_comorbidity'] = patient_data.apply(has_any_comorbidity, axis=1)
patient_data['charlson_score'] = patient_data.apply(compute_charlson_index, axis=1)

OR_data=patient_data[['has_comorbidity','outco_outcome','subjid']].groupby(['has_comorbidity','outco_outcome']).nunique().reset_index()
OR_data=OR_data.loc[OR_data['outco_outcome']!='Ongoing']


# Group with comorbidity = 1
a = OR_data['subjid'][(OR_data['has_comorbidity'] == 1) & (OR_data['outco_outcome'] == 'Deceased')]  # died
b = OR_data['subjid'][(OR_data['has_comorbidity'] == 1) & (OR_data['outco_outcome'] != 'Deceased')] # survived

# Group with comorbidity = 0
c = OR_data['subjid'][(OR_data['has_comorbidity'] == 0) & (OR_data['outco_outcome'] == 'Deceased')]  # died
d = OR_data['subjid'][(OR_data['has_comorbidity'] == 0) & (OR_data['outco_outcome'] != 'Deceased')]# survived

# 2x2 table for odds ratio
table = [[ int(a.iloc[0]), int(b.iloc[0])], [int(c.iloc[0]), int(d.iloc[0])]]
result = compute_odds_ratio(table)
print(f"\nOdds Ratio = {result['odds_ratio']:.3f}")
print(f"95% CI = ({result['ci_low']:.3f}, {result['ci_high']:.3f})")
print(f"P-value = {result['p_value']:.4f}")




Odds Ratio = 3.559
95% CI = (2.460, 5.148)
P-value = 0.0000


[[327, 145], [64, 101]]

In [178]:
table_patients= idw.fig_table(
        OR_data,
        suffix='suffix', filepath=None, save_inputs=False,
        graph_label='Number of patients included', graph_about='...')
table_patients[0]

In [None]:
OR_data.columns=['has_comorbidity', 'outco_outcome','subjid']

In [166]:
fig=idw.fig_sunburst(
        OR_data,
        title='Patient Outcomes by Comorbidity',
        path=['has_comorbidity', 'outco_outcome'], values='subjid',
        base_color_map=None,
        suffix='', filepath='', save_inputs=False,
        graph_id='', graph_label='', graph_about='')

fig[0]