# ICAN clinical data characteristics

**Objective:** present the simulated ICAN database.

## Packages importation

In [1]:
import pandas as pd
import numpy as np
from scipy import stats

import Missing_Values as MV

## Data loading

In [2]:
ican = pd.read_csv("simulated_ican_clinical_data.csv", sep = "#")
ican.shape

(2505, 28)

## Baseline Characteristics table creation

In [3]:
data_UIA = ican[ican.rupture == "No"]
data_RIA = ican[ican.rupture == "Yes"]

In [4]:
def f_dec(x) :
    if x >= 10 :
        return(1)
    else : 
        return(2) 

In [5]:
def info_count(df, var, value, dec = 1) :
    count = sum(df[var] == value)
    if count == 0 :
        return(0)
    else :
        return(str(count) + " (" + format(count/len(df)*100, '0.1f') + "%)")

In [6]:
def info_mean(df, var, nonnormal_vect = None) :
    if (var in nonnormal_vect) :
        dec = f_dec(np.median(df[var]))
        return(format(np.median(df[var]), '0.'+str(dec)+'f') + " [" 
               + format(np.quantile(df[var], 0.25), '0.'+str(dec)+'f') + "-" 
               + format(np.quantile(df[var], 0.75), '0.'+str(dec)+'f') + "]")
    else :
        dec = f_dec(np.mean(df[var]))
        return(format(np.mean(df[var]), '0.'+str(dec)+'f') + " \u00B1 "
               + format(np.std(df[var]), '0.'+str(dec)+'f'))

In [7]:
def col_filling_table(df, nonnormal_vect = None) :
    col = []
    table_index = []
    col.append(info_count(df, "sex", "F")) ; table_index.append("Sex (women)")
    col.append(info_mean(df, "age", nonnormal_vect)) ; table_index.append("Age (year)")
    col.append(info_mean(df, "bmi", nonnormal_vect)) ; table_index.append("Body mass index (kg/m²)")
    col.append(info_count(df, "sporadic_case", "No")) ; table_index.append("Familial history (yes)")
    col.append(info_mean(df, "adjusted_size_ratio", nonnormal_vect)) ; table_index.append("Adjusted Size Ratio")
    col.append(info_count(df, "IA_location", "ICA")) ; table_index.append("IA location - ICA")
    col.append(info_count(df, "IA_location", "MCA")) ; table_index.append("IA location - MCA")
    col.append(info_count(df, "IA_location", "ACA")) ; table_index.append("IA location - ACA")
    col.append(info_count(df, "IA_location", "PCA")) ; table_index.append("IA location - PCA")
    col.append(info_count(df, "multiple_IA", "Yes")) ; table_index.append("Multiple IA (yes)")
    col.append(info_count(df, "hta", "Yes")) ; table_index.append("Hypertension (yes)")
    col.append(info_count(df, "headaches", "Yes")) ; table_index.append("Headaches (yes)")
    col.append(info_count(df, "dyslipidemia", "Yes")) ; table_index.append("Dyslipidemia (yes)")
    col.append(info_count(df, "ischemic_stroke_history", "Yes")) ; table_index.append("Ischemic stroke history (yes)")
    
    col.append(info_count(df, "ischemic_heart_disease_history", "Yes"))
    table_index.append("Ischemic heart disease history (yes)")
    
    col.append(info_count(df, "statin_ttt", "Yes")) ; table_index.append("Statin treatment (yes)")
    col.append(info_count(df, "platelet_aggregation_inhibiting_ttt", "Yes")) ; 
    table_index.append("Platelet aggregation inhibiting treatment (yes)")
    col.append(info_count(df, "hormone_therapy_ttt", "Yes")) ; table_index.append("Hormone therapy treatment (yes)")
    col.append(info_count(df, "allergy", "Yes")) ; table_index.append("Allergy (yes)")
    
    col.append(info_count(df, "tobacco", "Non-smoker"))
    table_index.append("Tobacco consumption - Non-smoker")
    
    col.append(info_count(df, "tobacco", "Minor_smoker"))
    table_index.append("Tobacco consumption - Minor smoker")
    
    col.append(info_count(df, "tobacco", "Regular_smoker"))
    table_index.append("Tobacco consumption - Regular smoker")
    
    col.append(info_count(df, "alcohol", "> 150g")) ; table_index.append("Alcohol consumption (> 150g)")
    return col, table_index

In [8]:
nonnormal_vect = ["adjusted_size_ratio"]
col_UIA, table_index = col_filling_table(data_UIA, nonnormal_vect)
col_RIA, table_index = col_filling_table(data_RIA, nonnormal_vect)
col_TOT, table_index = col_filling_table(ican, nonnormal_vect)

In [9]:
table = pd.DataFrame({"All (n = " + str(len(ican)) + ')' : col_TOT, 
                      "RIA (n = " + str(len(data_RIA)) + ')' : col_RIA, 
                      "UIA (n = " + str(len(data_UIA)) + ')' : col_UIA}, 
                     index = table_index)

In [10]:
Pvalues = []
for i in range(len(table)) :
    if "%" in table.iloc[i,1] :
        val_Yes = [int(table.iloc[i,1].split()[0]), int(table.iloc[i,2].split()[0])]
        val_No = [sum(ican.rupture == "Yes") - int(table.iloc[i,1].split()[0]), 
                  sum(ican.rupture == "No") - int(table.iloc[i,2].split()[0])]
        df_contingence = pd.DataFrame({"Yes" : val_Yes, "No" : val_No})
        st_chi2, st_p, st_dof, st_exp = stats.chi2_contingency(df_contingence)
        if st_p < 0.001 :
            Pvalues.append("<0.001")
        else : 
            Pvalues.append(format(st_p, '0.3f'))
    elif "Age" in table.iloc[i].name :
        st_ttest, st_p = stats.ttest_ind(data_RIA.age, data_UIA.age, axis = 0, equal_var = False)
        if st_p < 0.001 :
            Pvalues.append("<0.001")
        else : 
            Pvalues.append(format(st_p, '0.3f'))
    elif "Body" in table.iloc[i].name :
        st_ttest, st_p = stats.ttest_ind(data_RIA.bmi, data_UIA.bmi, axis = 0, equal_var = False)
        if st_p < 0.001 :
            Pvalues.append("<0.001")
        else : 
            Pvalues.append(format(st_p, '0.3f'))
    elif "Adjusted Size Ratio" in table.iloc[i].name :
        st_med, st_p, st_m, st_table = stats.median_test(data_RIA.adjusted_size_ratio, data_UIA.adjusted_size_ratio)
        if st_p < 0.001 :
            Pvalues.append("<0.001")
        else : 
            Pvalues.append(format(st_p, '0.3f'))
table["p-value"] = Pvalues
table

Unnamed: 0,All (n = 2505),RIA (n = 959),UIA (n = 1546),p-value
Sex (women),1784 (71.2%),667 (69.6%),1117 (72.3%),0.160
Age (year),53.8 ± 12.3,50.8 ± 11.8,55.7 ± 12.2,<0.001
Body mass index (kg/m²),25.1 ± 4.5,24.9 ± 4.4,25.2 ± 4.6,0.101
Familial history (yes),509 (20.3%),169 (17.6%),340 (22.0%),0.010
Adjusted Size Ratio,3.27 [1.94-5.51],3.96 [2.27-6.12],2.89 [1.79-5.05],<0.001
IA location - ICA,610 (24.4%),115 (12.0%),495 (32.0%),<0.001
IA location - MCA,666 (26.6%),243 (25.3%),423 (27.4%),0.286
IA location - ACA,766 (30.6%),374 (39.0%),392 (25.4%),<0.001
IA location - PCA,463 (18.5%),227 (23.7%),236 (15.3%),<0.001
Multiple IA (yes),802 (32.0%),333 (34.7%),469 (30.3%),0.025
