# ICAN clinical data characteristics

**Objective:** present the ICAN database.

## Packages importation

In [1]:
import pandas as pd
import numpy as np
from scipy import stats

import Missing_Values as MV

## Data loading

In [2]:
ican = pd.read_csv("simulated_ican_clinical_data.csv", sep = "#")
ican.shape

(2505, 29)

## Baseline Characteristics table creation

In [3]:
data_UIA = ican[ican.rupture == "No"]
data_RIA = ican[ican.rupture == "Yes"]

In [4]:
def info_count(df, var, value, dec) :
    if value == "nan" :
        if dec == 0 :
            return str(MV.num_missing(df[var])) + " (" + str(int(round(MV.num_missing(df[var])/len(df)*100))) + "%)"
        else :
            return str(MV.num_missing(df[var])) + " (" + str(round(MV.num_missing(df[var])/len(df)*100, dec)) + "%)"
    elif len(df[df[var] == value]) == 0 :
        return 0
    else :
        if dec == 0 :
            return str(len(df[df[var] == value])) + " (" + str(int(round(len(df[df[var] == value])/len(df)*100))) + "%)"
        else :
            return str(len(df[df[var] == value])) + " (" + str(round(len(df[df[var] == value])/len(df)*100, dec)) + "%)"

In [5]:
def info_mean(df, var, dec) :
    df2 = df.copy()
    df2 = df2.reset_index(drop = True)
    if MV.num_missing(df[var]) != 0 :
        df2 = df2.drop(MV.index_missing(df2[var]), axis = 0)
        df2 = df2.reset_index(drop = True)
    else :
        df2 = df.copy()
    if df2[var].dtypes not in ["float64"] :
        df2[var] = df2[var].apply(lambda x: float(x))
    if str(np.mean(df2[var])) != str(np.nan) :
        if abs((stats.skew(df2[var]))) < 2 :
            if dec == 0 :
                val_mean = int(round(np.mean(df2[var])))
                val_std = int(round(np.std(df2[var])))
            else : 
                val_mean = round(np.mean(df2[var]), dec)
                val_std = round(np.std(df2[var]), dec)
            return str(val_mean) + " (+/- " + str(val_std) + ")"
        else :
            val_med = np.median(df2[var])
            val_Q1 = np.percentile(df2[var], 25)
            val_Q3 = np.percentile(df2[var], 75)
            return str(val_med) + " [" + str(val_Q1) + "-" + str(val_Q3) + "]"
    else :
        return "NaN"

In [6]:
def col_filling_table(df) :
    col = []
    table_index = []
    col.append(info_count(df, "sex", "F", 1)) ; table_index.append("Sex (women)")
    col.append(info_mean(df, "age", 1)) ; table_index.append("Age (year)")
    col.append(info_mean(df, "bmi", 1)) ; table_index.append("Body mass index (kg/m²)")
    col.append(info_count(df, "sporadic_case", "No", 1)) ; table_index.append("Familial history (yes)")
    col.append(info_mean(df, "adjusted_IA_size", 1)) ; table_index.append("Adjusted Size Ratio")
    col.append(info_count(df, "IA_location", "ICA", 1)) ; table_index.append("IA location - ICA")
    col.append(info_count(df, "IA_location", "MCA", 1)) ; table_index.append("IA location - MCA")
    col.append(info_count(df, "IA_location", "ACA", 1)) ; table_index.append("IA location - ACA")
    col.append(info_count(df, "IA_location", "PCA", 1)) ; table_index.append("IA location - PCA")
    col.append(info_count(df, "multiple_IA", "Yes", 1)) ; table_index.append("Multiple IA (yes)")
    col.append(info_count(df, "hta", "Yes", 1)) ; table_index.append("Hypertension (yes)")
    col.append(info_count(df, "headaches", "Yes", 1)) ; table_index.append("Headaches (yes)")
    col.append(info_count(df, "dyslipidemia", "Yes", 1)) ; table_index.append("Dyslipidemia (yes)")
    col.append(info_count(df, "ischemic_stroke_history", "Yes", 1)) ; table_index.append("Ischemic stroke history (yes)")
    
    col.append(info_count(df, "ischemic_heart_disease_history", "Yes", 1))
    table_index.append("Ischemic heart disease history (yes)")
    
    col.append(info_count(df, "packs_year", "Non-smoker", 1))
    table_index.append("Tobacco consumption - Non-smoker")
    
    col.append(info_count(df, "packs_year", "Little_smoker", 1))
    table_index.append("Tobacco consumption - Little smoker")
    
    col.append(info_count(df, "packs_year", "Regular_smoker", 1))
    table_index.append("Tobacco consumption - Regular smoker")
    
    col.append(info_count(df, "alcohol", "> 150g", 1)) ; table_index.append("Alcohol consumption (> 150g)")
    return col, table_index

In [7]:
col_UIA, table_index = col_filling_table(data_UIA)
col_RIA, table_index = col_filling_table(data_RIA)
col_TOT, table_index = col_filling_table(ican)

In [8]:
table = pd.DataFrame({"Whole population (n = " + str(len(ican)) + ')' : col_TOT, 
                      "Patients with ruptured IA (n = " + str(len(data_RIA)) + ')' : col_RIA, 
                      "Patients with unruptured IA (n = " + str(len(data_UIA)) + ')' : col_UIA}, 
                     index = table_index)
table

Unnamed: 0,Whole population (n = 2505),Patients with ruptured IA (n = 986),Patients with unruptured IA (n = 1519)
Sex (women),1813 (72.4%),722 (73.2%),1091 (71.8%)
Age (year),53.0 (+/- 11.8),53.2 (+/- 11.8),52.9 (+/- 11.8)
Body mass index (kg/m²),25.6 (+/- 4.5),25.6 (+/- 4.5),25.6 (+/- 4.6)
Familial history (yes),441 (17.6%),171 (17.3%),270 (17.8%)
Adjusted Size Ratio,2.8 (+/- 2.3),2.8 (+/- 2.3),2.8 (+/- 2.3)
IA location - ICA,610 (24.4%),240 (24.3%),370 (24.4%)
IA location - MCA,653 (26.1%),253 (25.7%),400 (26.3%)
IA location - ACA,800 (31.9%),322 (32.7%),478 (31.5%)
IA location - PCA,442 (17.6%),171 (17.3%),271 (17.8%)
Multiple IA (yes),796 (31.8%),318 (32.3%),478 (31.5%)


In [9]:
Pvalues = []
for i in range(len(table)) :
    if "%" in table.iloc[i,1] :
        val_Yes = [int(table.iloc[i,1].split()[0]), int(table.iloc[i,2].split()[0])]
        val_No = [994 - int(table.iloc[i,1].split()[0]), 1511 - int(table.iloc[i,2].split()[0])]
        df_contingence = pd.DataFrame({"Yes" : val_Yes, "No" : val_No})
        st_chi2, st_p, st_dof, st_exp = stats.chi2_contingency(df_contingence)
        if st_p < 0.001 :
            Pvalues.append("%.2e"%st_p)
        else : 
            Pvalues.append(round(st_p,3))
    elif "+/-" in table.iloc[i,1] :
        if "Age" in table.iloc[i].name :
            st_ttest, st_p = stats.ttest_ind(data_RIA.age, data_UIA.age, axis = 0, equal_var = False)
            if st_p < 0.001 :
                Pvalues.append("%.2e"%st_p)
            else : 
                Pvalues.append(round(st_p,3))
        if "Body" in table.iloc[i].name :
            st_ttest, st_p = stats.ttest_ind(data_RIA.bmi, data_UIA.bmi, axis = 0, equal_var = False)
            if st_p < 0.001 :
                Pvalues.append("%.2e"%st_p)
            else : 
                Pvalues.append(round(st_p,3))
        if "Adjusted Size Ratio" in table.iloc[i].name :
            st_med, st_p, st_m, st_table = stats.median_test(data_RIA.adjusted_IA_size, data_UIA.adjusted_IA_size)
            if st_p < 0.001 :
                Pvalues.append("%.2e"%st_p)
            else : 
                Pvalues.append(round(st_p,3))
table["p-value"] = Pvalues
table

Unnamed: 0,Whole population (n = 2505),Patients with ruptured IA (n = 986),Patients with unruptured IA (n = 1519),p-value
Sex (women),1813 (72.4%),722 (73.2%),1091 (71.8%),0.849
Age (year),53.0 (+/- 11.8),53.2 (+/- 11.8),52.9 (+/- 11.8),0.493
Body mass index (kg/m²),25.6 (+/- 4.5),25.6 (+/- 4.5),25.6 (+/- 4.6),0.863
Familial history (yes),441 (17.6%),171 (17.3%),270 (17.8%),0.708
Adjusted Size Ratio,2.8 (+/- 2.3),2.8 (+/- 2.3),2.8 (+/- 2.3),0.519
IA location - ICA,610 (24.4%),240 (24.3%),370 (24.4%),0.883
IA location - MCA,653 (26.1%),253 (25.7%),400 (26.3%),0.601
IA location - ACA,800 (31.9%),322 (32.7%),478 (31.5%),0.722
IA location - PCA,442 (17.6%),171 (17.3%),271 (17.8%),0.677
Multiple IA (yes),796 (31.8%),318 (32.3%),478 (31.5%),0.885
