# Table 1. Baseline characteristics of patients
- AGE
- SEX
- Bodyweight (KG)
-  Time of alarm (Day vs. night)
- MECHANISM
- NACA
- Main Diagnosis out-of-hospital (Spalte L)
- In-hospital
- Main Diagnosis In-hospital (M)
- ISS


In [None]:
import pandas as pd

In [None]:
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/pediatric_trauma/data/Data_PedRegaTrauma_coded_for_analysis_250417.xlsx'

In [None]:
data_df = pd.read_excel(data_path, sheet_name='All centres cleaned')

In [None]:
data_df.head()

In [None]:

def get_multi_label_counts(data_df, multi_label_column):
    data_df[multi_label_column] = data_df[multi_label_column].replace(999, pd.NA)
    label_counter = {}
    # iterate through the rows
    for index, row in data_df.iterrows():
        # split by comma then strip spaces
        labels = [label.strip() for label in str(row[multi_label_column]).split(',')]
        # if label not in the dict, add it
        for label in labels:
            if label == 'nan' or label == '<NA>':
                continue
            if label not in label_counter:
                label_counter[label] = 1
            else:
                label_counter[label] += 1

    # sort the dictionary by value
    sorted_label_counter = dict(sorted(label_counter.items(), key=lambda item: item[1], reverse=True))
    return sorted_label_counter
    

In [None]:
data_df['Main diagnosis in-hospital'].unique()

In [None]:
((pd.to_datetime(data_df['Date of accident']) - pd.to_datetime(data_df['Date of Birth']))/pd.Timedelta(days=365.25)).median()

In [None]:
def create_table1(df):
    
    assert data_df["Pat ID"].nunique() == len(data_df), "There are duplicate patient IDs in the dataset." 

    pop_df = pd.DataFrame()
    str_df = pd.DataFrame()

    # get the number of patients
    n_patients = len(data_df)
    pop_df['n_patients'] = [n_patients]
    str_df['n_patients'] = [n_patients]

    data_df.age = (pd.to_datetime(data_df['Date of accident']) - pd.to_datetime(data_df['Date of Birth']))/pd.Timedelta(days=365.25)

    pop_df['age_median'] = [data_df.age.median()]
    pop_df['age_q1'] = [data_df.age.quantile(0.25)]
    pop_df['age_q3'] = [data_df.age.quantile(0.75)]
    str_df['age_str'] = f'{pop_df.age_median.values[0]:.1f} [{pop_df.age_q1.values[0]:.1f} - {pop_df.age_q3.values[0]:.1f}]'

    pop_df['sex_m'] = [data_df.Sex.value_counts().get('2', 0)]
    str_df['sex_(male)_str'] = [f'{data_df.Sex.value_counts().get(2, 0)} ({data_df.Sex.value_counts(normalize=True).get(2, 0)*100:.1f}%)']

    # set n/a to nan
    df['Bodyweight (kg)'] = df['Bodyweight (kg)'].replace('n/a', pd.NA)
    # replace ',' with '.' 
    df['Bodyweight (kg)'] = df['Bodyweight (kg)'].astype(str).str.replace(',', '.')
    df['Bodyweight (kg)'] = pd.to_numeric(df['Bodyweight (kg)'], errors='coerce')

    pop_df['weight_median'] = [data_df['Bodyweight (kg)'].median()]
    pop_df['weight_q1'] = [data_df['Bodyweight (kg)'].quantile(0.25)]
    pop_df['weight_q3'] = [data_df['Bodyweight (kg)'].quantile(0.75)]
    str_df['weight_str'] = f'{pop_df.weight_median.values[0]:.1f} [{pop_df.weight_q1.values[0]:.1f} - {pop_df.weight_q3.values[0]:.1f}]'

    # replace 999 with nan
    df['Time of alarm (day/night)'] = df['Time of alarm (day/night)'].replace(999, pd.NA)
    pop_df['night_mission'] = [data_df['Time of alarm (day/night)'].value_counts().get('2', 0)]
    str_df['night_mission_str'] = [f'{data_df["Time of alarm (day/night)"].value_counts().get(2, 0)} ({data_df["Time of alarm (day/night)"].value_counts(normalize=True).get(2, 0)*100:.1f}%)']
    
    # Encoding of the mechanism of injury
    # Trafic accident	1
    # Fall (from hights)	2
    # Burning/scalding	3
    # battled child	4
    # Kollision mit Beteiligung anderer Menschen	5
    # Skisturz/ Hochrasantstrauma/ Kollision mit Gegenstände	6
    # Stolpersturz/ Sturz aus Körperhöhe	7
    # other traumatic impact	8

    data_df['Mechanism of injury'] = data_df['Mechanism of injury'].replace('1,6', 1)
    # assert that all values are in the range 1-8
    assert data_df['Mechanism of injury'].isin([1, 2, 3, 4, 5, 6, 7, 8]).all(), "There are values in the Mechanism of injury column that are not in the range 1-8"
    pop_df['mechanism_traffic'] = [data_df['Mechanism of injury'].value_counts().get(1, 0)]
    str_df['mechanism_traffic_str'] = [f'{data_df["Mechanism of injury"].value_counts().get(1, 0)} ({data_df["Mechanism of injury"].value_counts(normalize=True).get(1, 0)*100:.1f}%)']
    pop_df['mechanism_fall'] = [data_df['Mechanism of injury'].value_counts().get(2, 0)]
    str_df['mechanism_fall_str'] = [f'{data_df["Mechanism of injury"].value_counts().get(2, 0)} ({data_df["Mechanism of injury"].value_counts(normalize=True).get(2, 0)*100:.1f}%)']
    pop_df['mechanism_burn'] = [data_df['Mechanism of injury'].value_counts().get(3, 0)]
    str_df['mechanism_burn_str'] = [f'{data_df["Mechanism of injury"].value_counts().get(3, 0)} ({data_df["Mechanism of injury"].value_counts(normalize=True).get(3, 0)*100:.1f}%)']
    pop_df['mechanism_battered'] = [data_df['Mechanism of injury'].value_counts().get(4, 0)]
    str_df['mechanism_battered_str'] = [f'{data_df["Mechanism of injury"].value_counts().get(4, 0)} ({data_df["Mechanism of injury"].value_counts(normalize=True).get(4, 0)*100:.1f}%)']
    pop_df['mechanism_collision'] = [data_df['Mechanism of injury'].value_counts().get(5, 0)]
    str_df['mechanism_collision_str'] = [f'{data_df["Mechanism of injury"].value_counts().get(5, 0)} ({data_df["Mechanism of injury"].value_counts(normalize=True).get(5, 0)*100:.1f}%)']
    pop_df['mechanism_other_high_velocity'] = [data_df['Mechanism of injury'].value_counts().get(6, 0)]
    str_df['mechanism_other_high_velocity_str'] = [f'{data_df["Mechanism of injury"].value_counts().get(6, 0)} ({data_df["Mechanism of injury"].value_counts(normalize=True).get(6, 0)*100:.1f}%)']
    pop_df['mechanism_fall_from_own_height'] = [data_df['Mechanism of injury'].value_counts().get(7, 0)]
    str_df['mechanism_fall_from_own_height_str'] = [f'{data_df["Mechanism of injury"].value_counts().get(7, 0)} ({data_df["Mechanism of injury"].value_counts(normalize=True).get(7, 0)*100:.1f}%)']
    pop_df['mechanism_other_trauma'] = [data_df['Mechanism of injury'].value_counts().get(8, 0)]
    str_df['mechanism_other_trauma_str'] = [f'{data_df["Mechanism of injury"].value_counts().get(8, 0)} ({data_df["Mechanism of injury"].value_counts(normalize=True).get(8, 0)*100:.1f}%)']

    pop_df['naca_median'] = [data_df['NACA'].median()]
    pop_df['naca_q1'] = [data_df['NACA'].quantile(0.25)]
    pop_df['naca_q3'] = [data_df['NACA'].quantile(0.75)]
    str_df['naca_str'] = f'{pop_df.naca_median.values[0]:.1f} [{pop_df.naca_q1.values[0]:.1f} - {pop_df.naca_q3.values[0]:.1f}]'

    # multi-diagnosis counts
    data_df['Main diagnosis pre-hospital'] = data_df['Main diagnosis pre-hospital'].replace('<NA>', pd.NA)
    data_df['Main diagnosis pre-hospital'] = data_df['Main diagnosis pre-hospital'].replace('Vd. a. Asphiktische REA', 10)
    data_df['Main diagnosis pre-hospital'] = data_df['Main diagnosis pre-hospital'].replace('1. CO Intoxikation durch Rauchgasvergiftung (Kachelofen)\n   - CO 20%\n   - Schwindel, Unwohlsein, fragliche krampfartigen Äquivalente', 11)
    data_df['Main diagnosis pre-hospital'] = data_df['Main diagnosis pre-hospital'].replace('1. CO INtoxikation durch Rauchgasvergiftung (Kachelofen) mit\n   - Krampfäquivalent, Schwindel, Übelkeit\n   - CO 22%', 11)
    multi_label_column = 'Main diagnosis pre-hospital'
    pre_hosp_diagnosis_counts = get_multi_label_counts(data_df, multi_label_column)

    # Traumatic brain injury, HWS	1
    # Chest trauma	2
    # Abdominal trauma	3
    # Pelvic Trauma/ LWS	4
    # Upper extremity trauma	5
    # Lower extremity trauma	6
    # Spine injury	7
    # Face	8
    # Polytrauma	9
    # Ertrinken	10
    # Ganzkörperverbrennung	11

    pre_hosp_dx_df = pd.DataFrame()
    in_hosp_dx_df = pd.DataFrame()
    # replace every label number with the corresponding label in sorted_label_counter
    pre_hosp_dx_df['tbi_cspine_str'] = [f'{pre_hosp_diagnosis_counts.get("1", 0)} ({pre_hosp_diagnosis_counts.get("1", 0)/n_patients*100:.1f}%)']
    pre_hosp_dx_df['chest_str'] = [f'{pre_hosp_diagnosis_counts.get("2", 0)} ({pre_hosp_diagnosis_counts.get("2", 0)/n_patients*100:.1f}%)']
    pre_hosp_dx_df['abdomen_str'] = [f'{pre_hosp_diagnosis_counts.get("3", 0)} ({pre_hosp_diagnosis_counts.get("3", 0)/n_patients*100:.1f}%)']
    pre_hosp_dx_df['pelvic_str'] = [f'{pre_hosp_diagnosis_counts.get("4", 0)} ({pre_hosp_diagnosis_counts.get("4", 0)/n_patients*100:.1f}%)']
    pre_hosp_dx_df['upper_extremity_str'] = [f'{pre_hosp_diagnosis_counts.get("5", 0)} ({pre_hosp_diagnosis_counts.get("5", 0)/n_patients*100:.1f}%)']
    pre_hosp_dx_df['lower_extremity_str'] = [f'{pre_hosp_diagnosis_counts.get("6", 0)} ({pre_hosp_diagnosis_counts.get("6", 0)/n_patients*100:.1f}%)']
    pre_hosp_dx_df['face_str'] = [f'{pre_hosp_diagnosis_counts.get("8", 0)} ({pre_hosp_diagnosis_counts.get("8", 0)/n_patients*100:.1f}%)']
    pre_hosp_dx_df['polytrauma_str'] = [f'{pre_hosp_diagnosis_counts.get("9", 0)} ({pre_hosp_diagnosis_counts.get("9", 0)/n_patients*100:.1f}%)']
    pre_hosp_dx_df['drowning_and_asphyxia_str'] = [f'{pre_hosp_diagnosis_counts.get("10", 0)} ({pre_hosp_diagnosis_counts.get("10", 0)/n_patients*100:.1f}%)']
    pre_hosp_dx_df['burns_and_fumes_str'] = [f'{pre_hosp_diagnosis_counts.get("11", 0)} ({pre_hosp_diagnosis_counts.get("11", 0)/n_patients*100:.1f}%)']

    
    data_df['Main diagnosis in-hospital'] = data_df['Main diagnosis in-hospital'].replace('Obstrukt.Atemversagen -REA', 10)
    data_df['Main diagnosis in-hospital'] = data_df['Main diagnosis in-hospital'].replace('C2-Intoxikation', pd.NA)
    in_hospital_diagnosis_counts = get_multi_label_counts(data_df,'Main diagnosis in-hospital')

    in_hosp_dx_df['tbi_cspine_str'] = [f'{in_hospital_diagnosis_counts.get("1", 0)} ({in_hospital_diagnosis_counts.get("1", 0)/n_patients*100:.1f}%)']
    in_hosp_dx_df['chest_str'] = [f'{in_hospital_diagnosis_counts.get("2", 0)} ({in_hospital_diagnosis_counts.get("2", 0)/n_patients*100:.1f}%)']
    in_hosp_dx_df['abdomen_str'] = [f'{in_hospital_diagnosis_counts.get("3", 0)} ({in_hospital_diagnosis_counts.get("3", 0)/n_patients*100:.1f}%)']
    in_hosp_dx_df['pelvic_str'] = [f'{in_hospital_diagnosis_counts.get("4", 0)} ({in_hospital_diagnosis_counts.get("4", 0)/n_patients*100:.1f}%)']
    in_hosp_dx_df['upper_extremity_str'] = [f'{in_hospital_diagnosis_counts.get("5", 0)} ({in_hospital_diagnosis_counts.get("5", 0)/n_patients*100:.1f}%)']
    in_hosp_dx_df['lower_extremity_str'] = [f'{in_hospital_diagnosis_counts.get("6", 0)} ({in_hospital_diagnosis_counts.get("6", 0)/n_patients*100:.1f}%)']
    in_hosp_dx_df['face_str'] = [f'{in_hospital_diagnosis_counts.get("8", 0)} ({in_hospital_diagnosis_counts.get("8", 0)/n_patients*100:.1f}%)']
    in_hosp_dx_df['polytrauma_str'] = [f'{in_hospital_diagnosis_counts.get("9", 0)} ({in_hospital_diagnosis_counts.get("9", 0)/n_patients*100:.1f}%)']
    in_hosp_dx_df['drowning_and_asphyxia_str'] = [f'{in_hospital_diagnosis_counts.get("10", 0)} ({in_hospital_diagnosis_counts.get("10", 0)/n_patients*100:.1f}%)']
    in_hosp_dx_df['burns_and_fumes_str'] = [f'{in_hospital_diagnosis_counts.get("11", 0)} ({in_hospital_diagnosis_counts.get("11", 0)/n_patients*100:.1f}%)']

    # remove _str suffix from the column names in str_df, pre_hosp_dx_df and in_hosp_dx_df
    for df in [str_df, pre_hosp_dx_df, in_hosp_dx_df]:
        df.columns = df.columns.str.replace('_str', '', regex=False)

    # create a dx_df with two columns: pre_hosp and in_hosp and a row for each diagnosis
    dx_df = pd.merge(pre_hosp_dx_df.T, in_hosp_dx_df.T, left_index=True, right_index=True)
    dx_df.columns = ['Pre-hospital', 'Intrahospital']
    dx_df.index.name = 'Diagnosis'
    return pop_df.T, str_df.T, dx_df




In [None]:
pop_df, str_df, dx_df = create_table1(data_df)
str_df

In [None]:
dx_df

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from math import pi

def create_diagnosis_radar_plot(dx_df):
    # Extract the numerical values from the percentage strings
    pre_hosp_values = []
    in_hosp_values = []
    
    for index, row in dx_df.iterrows():
        # Extract percentage values from strings like "25 (12.5%)"
        pre_hosp_pct = float(row['Pre-hospital'].split('(')[1].split('%')[0])
        in_hosp_pct = float(row['Intrahospital'].split('(')[1].split('%')[0])
        
        pre_hosp_values.append(pre_hosp_pct)
        in_hosp_values.append(in_hosp_pct)
    
    # Set up the radar chart
    categories = dx_df.index.tolist()
    N = len(categories)
    
    # Compute angle for each axis
    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]  # Complete the circle
    
    # Add the first value at the end to close the radar chart
    pre_hosp_values += pre_hosp_values[:1]
    in_hosp_values += in_hosp_values[:1]
    
    # Create the plot
    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))
    
    # Plot the data
    ax.plot(angles, pre_hosp_values, 'o-', linewidth=2, label='Pre-hospital', color='#1f77b4')
    ax.fill(angles, pre_hosp_values, alpha=0.25, color='#1f77b4')
    
    ax.plot(angles, in_hosp_values, 'o-', linewidth=2, label='Intrahospital', color='#ff7f0e')
    ax.fill(angles, in_hosp_values, alpha=0.25, color='#ff7f0e')
    
    # Add category labels
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(categories, size=10)
    
    # Set y-axis limits and labels
    max_value = max(max(pre_hosp_values[:-1]), max(in_hosp_values[:-1]))
    ax.set_ylim(0, max_value * 1.1)
    ax.set_ylabel('Percentage (%)', size=12)
    
    # Add grid
    ax.grid(True)
    
    # Add legend
    ax.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
    
    # Add title
    plt.title('Diagnosis Distribution: Pre-hospital vs Intrahospital', size=16, weight='bold', pad=20)
    
    plt.tight_layout()
    plt.show()

# Create the radar plot
create_diagnosis_radar_plot(dx_df)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Categories and data
labels = [
    'Head (215/161)',
    'Face (115/11)',
    'Neck (22/8)',
    'Thorax (176/142)',
    'Abdomen (66/44)',
    'Pelvis (53/39)',
    'Spine (140/60)',
    'Upper extremity (134/3)',
    'Lower extremity (89/38)',
    'External (22/1)'
]

# Percentage values for each category
mild =       [85, 30, 25, 50, 25, 35, 15, 45, 40, 30]
relevant =   [95, 55, 30, 80, 60, 80, 45, 70, 70,  0]

# Number of variables
def make_radar(labels, data_series, colors, alphas, lines, fills, legend_labels):
    num_vars = len(labels)

    # Compute angle each axis is at
    angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
    # wrap the data
    angles += angles[:1]

    # Create figure and polar subplot
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
    # Rotate the plot so the first axis is on top
    ax.set_theta_offset(np.pi / 2)
    ax.set_theta_direction(-1)

    # Plot each series
    for data, color, alpha, linewidth, label in zip(data_series, colors, alphas, lines, legend_labels):
        d = data + data[:1]
        ax.plot(angles, d, color=color, linewidth=linewidth, label=label)
        ax.fill(angles, d, color=color, alpha=alpha)

    # Set category labels
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(labels)

    # Set radial labels and grid
    ax.set_rlabel_position(30)
    ax.set_yticks([20, 40, 60, 80, 100])
    ax.set_yticklabels(['20%', '40%', '60%', '80%', '100%'])
    ax.set_ylim(0, 100)

    # Title and legend
    ax.set_title('Identified Body Region Trauma', y=1.08, fontsize=16, fontweight='bold')
    ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))

    plt.tight_layout()
    plt.show()

# Call the function with our data
make_radar(
    labels=labels,
    data_series=[mild, relevant],
    colors=['green', 'red'],
    alphas=[0.25, 0.25],
    lines=[2, 2],
    fills=[True, True],
    legend_labels=[
        'Identified body region trauma: mild [%]',
        'Identified body region trauma: relevant [%]'
    ]
)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

def make_radar_from_dx_df(dx_df):
    """
    Create a radar chart using data from dx_df with Pre-hospital and Intrahospital categories
    """
    # Extract the numerical values from the percentage strings
    pre_hosp_values = []
    in_hosp_values = []
    
    for index, row in dx_df.iterrows():
        # Extract percentage values from strings like "25 (12.5%)"
        pre_hosp_pct = float(row['Pre-hospital'].split('(')[1].split('%')[0])
        in_hosp_pct = float(row['Intrahospital'].split('(')[1].split('%')[0])
        
        pre_hosp_values.append(pre_hosp_pct)
        in_hosp_values.append(in_hosp_pct)
    
    # Use diagnosis categories as labels
    # labels = dx_df.index.tolist()
    labels = ['TBI', 'Chest', 'Abdomen', 'Pelvis', 'Upper extremity', 'Lower extremity', 'Face', 'Polytrauma', 'Drowning', 'Burns']
    num_vars = len(labels)

    # Compute angle each axis is at
    angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
    # wrap the data
    angles += angles[:1]

    # Create figure and polar subplot
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
    # Rotate the plot so the first axis is on top
    ax.set_theta_offset(np.pi / 2)
    ax.set_theta_direction(-1)

    # Prepare data series
    data_series = [pre_hosp_values, in_hosp_values]
    # colors = ['#1f77b4', '#ff7f0e']  # Blue for pre-hospital, orange for intrahospital
    # colors = ['red', 'green']
    colors = [ '#FF6347', '#2E8B57']
    alphas = [0.25, 0.25]
    lines = [2, 2]
    legend_labels = ['Pre-hospital', 'Intrahospital']

    # Plot each series
    for data, color, alpha, linewidth, label in zip(data_series, colors, alphas, lines, legend_labels):
        d = data + data[:1]  # Close the radar chart
        ax.plot(angles, d, color=color, linewidth=linewidth, label=label)
        ax.fill(angles, d, color=color, alpha=alpha)

    # Set category labels
    X_HORIZONTAL_TICK_PADDING = 19  
    ax.set_xticks(angles[:-1])
    # add offset to the labels
    ax.set_xticklabels(labels, fontsize=11)
    XTICKS = ax.xaxis.get_major_ticks()
    for tick in XTICKS:
        tick.set_pad(X_HORIZONTAL_TICK_PADDING) 

    # Set radial labels and grid
    ax.set_rlabel_position(40)
    
    # # Calculate appropriate y-axis scale based on data
    # max_value = max(max(pre_hosp_values), max(in_hosp_values))
    # y_max = max(40, max_value * 1.1)  # At least 100%, or 110% of max value
    y_max = 40  # Fixed y-axis limit for consistency

    # Set appropriate tick marks
    if y_max <= 100:
        ax.set_yticks([10, 20, 30, 40])
        ax.set_yticklabels(['10%', '20%', '30%', '40%'])
    else:
        # Dynamic scaling for higher values
        tick_interval = int(y_max / 5)
        ticks = list(range(tick_interval, int(y_max) + 1, tick_interval))
        ax.set_yticks(ticks)
        ax.set_yticklabels([f'{tick}%' for tick in ticks])
    
    ax.set_ylim(0, y_max)

    # Title and legend
    # ax.set_title('Diagnosis Distribution: Pre-hospital vs Intrahospital', 
                #  y=1.08, fontsize=16, fontweight='bold')
    ax.legend(loc='upper right', bbox_to_anchor=(1.1, 1.1))

    plt.tight_layout()
    plt.show()
    
    return fig, ax

# Call the function with dx_df
fig, ax = make_radar_from_dx_df(dx_df)


In [None]:
# fig.savefig('/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/pediatric_trauma/figures/diagnosis_distribution_radar_chart.png', bbox_inches='tight', dpi=600)

In [None]:
# plot age distribution$
data_df.age.hist(bins=50)

In [None]:
# save pop_df, str_df and dx_df to excel
# pop_df.to_excel('/Users/jk1/Downloads/table1_pop_df.xlsx')
# str_df.to_excel('/Users/jk1/Downloads/table1.xlsx')
# dx_df.to_excel('/Users/jk1/Downloads/diagnoses.xlsx')