In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load Data

In [None]:
# Load Data
df = pd.read_csv('LEOSS Kuehn 08 25/SUF_Kuehn_200825.csv', sep=';')

# Only keep entries that either recovered or died specifically from Covid-19
df = df[np.any([(df['BL_LastKnownStatus'] == 2).values, (df['BL_LastKnownStatus'] == 3).values], axis=0)]

# Discard entries with unknown age and convert to int
df = df[df['BL_Age']!='*']
df['BL_Age'] = df['BL_Age'].astype(int)

display(df)

In [None]:
# Reads the values of a single column, which consits of strings. 
# The entry of the column may contain values like '<3' and '>15',
# which are replaced by min_val or max_val
def read_column(df, min_val=None, max_val=None):
    duration = []
    for i in range(len(df)):
        if '<' in df.values[i]:
            if min_val is not None:
                duration.append(min_val)
        elif '>' in df.values[i]:
            if max_val is not None:
                duration.append(max_val)
        else:
            if(int(df.values[i])>= 0):
                duration.append(int(df.values[i]))
            
    print("Number of Patients: ", len(duration))
            
    return duration

In [None]:
# Calculates probabilities of ICU transfer during hospital stay (mu_HU)
# and dying during ICU (mu_UD)

def calc_prob(df_age_groups, ages):
    mu_HU_list = []
    mu_UD_list = []
    for (df_age, age) in zip(df_age_groups, ages):
        df_hosp = df_age[['BL_Duration_Inpatientstay', 'BL_Duration_ICUStay', 'BL_LastKnownStatus', 'BL_Admission']]
        df_hosp = df_hosp[df_hosp['BL_Duration_Inpatientstay']!='Missing']
        df_icu = df_hosp[df_hosp['BL_Duration_ICUStay']!='Missing']
        df_dead = df_icu[df_icu['BL_LastKnownStatus']==3]
        df_icu_rec = df_icu[df_icu['BL_LastKnownStatus']==2]
        df_hosp_rec = df_hosp[np.all([df_hosp['BL_LastKnownStatus']==2, df_hosp['BL_Duration_ICUStay']=='Missing'], axis=0)]


        num_hosp = len(df_hosp.values[:,0])
        num_icu = len(df_icu.values[:,0])
        num_dead = len(df_dead.values[:,0])

        num_rec_hosp = len(df_hosp_rec.values[:,0])
        num_rec_icu = len(df_icu_rec.values[:,0])

        if num_hosp > 0:
            mu_HU = num_icu/num_hosp
            mu_HU_list.append(mu_HU)
        else: 
            mu_HU = 'No Entries for this Age Group'
            mu_HU_list.append(0)

        if num_icu > 0:
            mu_UD = num_dead/num_icu
            mu_UD_list.append(mu_UD)
        else:
            mu_UD = 'No Entries for this Age Group'
            mu_UD_list.append(0)

        print('Parameters for Ages', age+ ':')
        print('Theta:', mu_HU)
        print('Delta:', mu_UD)
        print('')


    if len(mu_UD_list) > 1:
        fig, ax = plt.subplots(1,1,figsize=(9,6))
        ax.plot(range(len(ages)), mu_UD_list)
        ax.set_xticks(range(len(ages)))
        ax.set_xticklabels(ages)
        ax.set_title('Probability of death during ICU',fontsize=18)
        ax.set_xlabel('Age Group', fontsize=18)
        ax.set_ylabel('Probability', fontsize=18)
        plt.show()

        fig, ax = plt.subplots(1,1,figsize=(9,6))
        ax.plot(range(len(ages)), mu_HU_list)
        ax.set_xticks(range(len(ages)))
        ax.set_xticklabels(ages)
        ax.set_title('Probability of ICU during Hospitalstay',fontsize=18)
        ax.set_xlabel('Age Group', fontsize=18)
        ax.set_ylabel('Probability', fontsize=18)
        plt.show()
    return mu_HU_list, mu_UD_list

In [None]:
# calculates average time spent in hospital before recovering
def calc_time_hosp_rec(df_age_groups, ages):
    print('R5:')
    durations = []
    mean = []
    std = []
    for df_age, age in zip(df_age_groups, ages):
        print('Parameters for Ages', age+ ':')
        df_hosp_rec_dur = df_age['BL_Duration_Inpatientstay'][np.all([df_age['BL_Duration_Inpatientstay']!='Missing',
                                                                       df_age['BL_LastKnownStatus']==2, 
                                                                       df_age['BL_Duration_ICUStay']=='Missing'], axis=0)]

        duration = read_column(df_hosp_rec_dur, 2, 15)

        print('Mean of R5:', np.mean(duration))
        print('STD of R5:', np.std(duration), '\n')
        durations.append(duration)
        mean.append(np.mean(duration))
        std.append(np.std(duration))

    fig, ax = plt.subplots(1,1,figsize=(9,6))
    
    for age in range(len(durations)):
        unique, counts = np.unique(durations[age], return_counts=True)
        ax.plot(unique, counts)
    if len(durations) > 1:
        ax.legend(ages, fontsize=min(18,18*6/len(durations)))
    ax.set_ylim(0)
    ax.set_title('Time spent in hospital before recovering', fontsize = 18)
    ax.set_xlabel('Time [Days]', fontsize=18)
    ax.set_ylabel('Number of Patients', fontsize=18)
    plt.show()
    
    return mean, std

In [None]:
# calculates average time of positive test before hospital admission 
def calc_time_inf_hosp(df_age_groups, ages):
    print('R6:')
    durations = []
    mean = []
    std = []
    for df_age, age in zip(df_age_groups, ages):
        print('Parameters for Ages', age+ ':')
        df_admission = df_age['BL_Admission'][df_age['BL_Admission']!='Missing']


        duration = read_column(df_admission, max_val=7)

        print('Mean of R6:', np.mean(duration))
        print('STD of R6:', np.std(duration))
        durations.append(duration)
        mean.append(np.mean(duration))
        std.append(np.std(duration))
        print()

    fig, ax = plt.subplots(1,1,figsize=(9,6))
    for age in range(len(durations)):
        unique, counts = np.unique(durations[age], return_counts=True)
        ax.plot(unique, counts)
    if len(durations) > 1:
        ax.legend(ages, fontsize=min(18,18*6/len(durations)))
    ax.set_ylim(0)
    ax.set_title('Time of known infection before admission', fontsize = 18)
    ax.set_xlabel('Time [Days]', fontsize=18)
    ax.set_ylabel('Number of Patients', fontsize=18)
    plt.show()   
    
    return mean, std


In [None]:
# calculates average time of hospital stay before ICU admission
def calc_time_hosp_ICU(df_age_groups, ages):
    print('R7:')
    durations = []
    mean = []
    std = []
    for df_age, age in zip(df_age_groups, ages):
        print('Parameters for Ages', age+ ':')
        df_hosp_icu_dead_dur = df_age[['BL_Duration_Inpatientstay', 
                              'BL_Duration_ICUStay']][np.all([df_age['BL_Duration_Inpatientstay']!='Missing',
                                                              df_age['BL_LastKnownStatus']==3, 
                                                              df_age['BL_Duration_ICUStay']!='Missing'], axis=0)]
        hosp_duration = read_column(df_hosp_icu_dead_dur['BL_Duration_Inpatientstay'], 1.5, 16)
        icu_duration = read_column(df_hosp_icu_dead_dur['BL_Duration_ICUStay'], 2, 15)

        durations.append(np.array(hosp_duration) - np.array(icu_duration))

        icu_mean = np.mean(np.array(hosp_duration) - np.array(icu_duration))
        print('Mean of R7:', np.mean(np.array(hosp_duration) - np.array(icu_duration)))
        print('STD of R7:', np.std(np.array(hosp_duration) - np.array(icu_duration)))
        mean.append(np.mean(np.array(hosp_duration) - np.array(icu_duration)))
        std.append(np.std(np.array(hosp_duration) - np.array(icu_duration)))
        print()

    fig, ax = plt.subplots(1,1,figsize=(9,6))
    for age in range(len(durations)):
        unique, counts = np.unique(durations[age], return_counts=True)
        ax.plot(unique, counts)
    if len(durations) > 1:
        ax.legend(ages, fontsize=min(18,18*6/len(durations)))
    ax.set_ylim(0)
    ax.set_title('Time of Hospital Stay before ICU', fontsize = 18)
    ax.set_xlabel('Time [Days]', fontsize=18)
    ax.set_ylabel('Number of Patients', fontsize=18)
    plt.show() 
    
    return mean, std

In [None]:
# calculates average time of ICU stay before recovering
def calc_time_ICU_rec(df_age_groups, ages):
    print('R8:')
    durations = []
    mean = []
    std = []
    for df_age, age in zip(df_age_groups, ages):
        print('Parameters for Ages', age+ ':')
        df_hosp_icu_dead_dur = df_age[['BL_Duration_Inpatientstay', 
                              'BL_Duration_ICUStay']][np.all([df_age['BL_Duration_Inpatientstay']!='Missing',
                                                              df_age['BL_LastKnownStatus']==2, 
                                                              df_age['BL_Duration_ICUStay']!='Missing'], axis=0)]
        icu_duration = read_column(df_hosp_icu_dead_dur['BL_Duration_ICUStay'], 2, 15)

        durations.append(icu_duration)

        print('Mean of R8:', np.mean(icu_duration))
        print('STD of R8:', np.std(icu_duration))
        
        mean.append(np.mean(np.array(icu_duration)))
        std.append(np.std(np.array(icu_duration)))
        print()

    fig, ax = plt.subplots(1,1,figsize=(9,6))
    for age in range(len(durations)):
        unique, counts = np.unique(durations[age], return_counts=True)
        ax.plot(unique, counts)
    if len(durations) > 1:
        ax.legend(ages, fontsize=min(18,18*6/len(durations)))
    ax.set_ylim(0)
    ax.set_title('Time of ICU Stay before Recovering', fontsize = 18)
    ax.set_xlabel('Time [Days]', fontsize=18)
    ax.set_ylabel('Number of Patients', fontsize=18)
    plt.show() 
    
    return mean, std



In [None]:
# calculates average time of ICU stay before dying
def calc_time_ICU_death(df_age_groups, ages):
    print('R10:')
    durations = []
    mean = []
    std = []
    for df_age, age in zip(df_age_groups, ages):
        print('Parameters for Ages', age+ ':')
        df_hosp_icu_dead_dur = df_age[['BL_Duration_Inpatientstay', 
                              'BL_Duration_ICUStay']][np.all([df_age['BL_Duration_Inpatientstay']!='Missing',
                                                              df_age['BL_LastKnownStatus']==3, 
                                                              df_age['BL_Duration_ICUStay']!='Missing'], axis=0)]
        icu_duration = read_column(df_hosp_icu_dead_dur['BL_Duration_ICUStay'], 2, 15)

        durations.append(np.array(icu_duration))

        print('Mean of R10:', np.mean(icu_duration))
        print('STD of R10:', np.std(icu_duration))
        
        mean.append(np.mean(np.array(icu_duration)))
        std.append(np.std(np.array(icu_duration)))
        print()

    fig, ax = plt.subplots(1,1,figsize=(9,6))
    for age in range(len(durations)):
        unique, counts = np.unique(durations[age], return_counts=True)
        ax.plot(unique, counts)
    if len(durations) > 1:
        ax.legend(ages, fontsize=min(18,18*6/len(durations)))
    ax.set_ylim(0)
    ax.set_title('Time of ICU Stay before Death', fontsize = 18)
    ax.set_xlabel('Time [Days]', fontsize=18)
    ax.set_ylabel('Number of Patients', fontsize=18)
    plt.show() 
    
    return mean, std

# All Ages

### Theta and Delta

In [None]:
theta, delta = calc_prob([df], ['All'])

### Hospital to Recovered (R5)

In [None]:
R5_mean, R5_std = calc_time_hosp_rec([df], ['All'])

### Infected to Hospital (R6)

In [None]:
R6_mean, R6_std = calc_time_inf_hosp([df], ['All'])

### Hospital to ICU (R7)

In [None]:
R7_mean, R7_std = calc_time_hosp_ICU([df], ['All'])

### ICU to Recovered (R8)

In [None]:
R8_mean, R8_std = calc_time_ICU_rec([df], ['All'])

### ICU to Death (R10)

In [None]:
R10_mean, R10_std = calc_time_ICU_death([df], ['All'])

In [None]:
params = pd.DataFrame([theta, delta, 
                       R5_mean, R6_mean, R7_mean, R8_mean, R10_mean, 
                       R5_std, R6_std, R7_std, R8_std, R10_std], 
                      columns=['All'], index=['theta', 'delta', 
                       'R5_mean', 'R6_mean', 'R7_mean', 'R8_mean', 'R10_mean', 
                       'R5_std', 'R6_std', 'R7_std', 'R8_std', 'R10_std']).T
display(params)
params.to_csv('params_1age_groups.csv')

# 8 Age Groups

In [None]:
# Devide DF into Age Groups

df_age_groups = []
ages = ['0-4', '4-17', '15-25', '25-35', '35-45', '45-55', '55-65', '65-75', '75+']

df_age_groups.append(df[np.any([df['BL_Age']==1, df['BL_Age']==2], axis=0)]) # Age 0-4
df_age_groups.append(df[np.any([df['BL_Age']==3, df['BL_Age']==4, df['BL_Age']==5], axis=0)]) # Age 4-17
df_age_groups.append(df[np.any([df['BL_Age']==13, df['BL_Age']==14], axis=0)]) # Age 15-25
df_age_groups.append(df[df['BL_Age']==6]) # Age 25-35
df_age_groups.append(df[df['BL_Age']==7]) # Age 35-45
df_age_groups.append(df[df['BL_Age']==8]) # Age 45-55
df_age_groups.append(df[df['BL_Age']==9]) # Age 55-65
df_age_groups.append(df[df['BL_Age']==10]) # Age 65-75
df_age_groups.append(df[np.any([df['BL_Age']==11, df['BL_Age']==12], axis=0)]) # Age 75+

for df_age, age in zip(df_age_groups, ages):
    print('Number of Patients in age group', age + ':\t', len(df_age))

### Theta and Delta

In [None]:
theta, delta = calc_prob(df_age_groups, ages)

### Hospital to Recovered (R5)

In [None]:
R5_mean, R5_std = calc_time_hosp_rec(df_age_groups, ages)

### Infected to Hospital (R6)

In [None]:
R6_mean, R6_std = calc_time_inf_hosp(df_age_groups, ages)

### Hospital to ICU (R7)

In [None]:
R7_mean, R7_std = calc_time_hosp_ICU(df_age_groups, ages)

### ICU to Recovered (R8)

In [None]:
R8_mean, R8_std = calc_time_ICU_rec(df_age_groups, ages)

### ICU to Death (R10)

In [None]:
R10_mean, R10_std = calc_time_ICU_death(df_age_groups, ages)

In [None]:
params = pd.DataFrame([theta, delta, 
                       R5_mean, R6_mean, R7_mean, R8_mean, R10_mean, 
                       R5_std, R6_std, R7_std, R8_std, R10_std], 
                      columns=ages, index=['theta', 'delta', 
                       'R5_mean', 'R6_mean', 'R7_mean', 'R8_mean', 'R10_mean', 
                       'R5_std', 'R6_std', 'R7_std', 'R8_std', 'R10_std']).T
display(params)

params.to_csv('params_8age_groups.csv')

# 3 Age Groups

In [None]:
df_age_groups = []
ages = ['0-25', '25-55', '55+']

df_age_groups.append(df[np.any([df['BL_Age']==1, df['BL_Age']==2, 
                                df['BL_Age']==3, df['BL_Age']==4, df['BL_Age']==5, 
                                df['BL_Age']==13, df['BL_Age']==14], axis=0)]) # Age 0-25
df_age_groups.append(df[np.any([df['BL_Age']==6, df['BL_Age']==7, 
                                df['BL_Age']==8], axis=0)]) # Age 25-55
df_age_groups.append(df[np.any([df['BL_Age']==9, df['BL_Age']==10, 
                                df['BL_Age']==11, df['BL_Age']==12], axis=0)]) # Age 55+

for df_age, age in zip(df_age_groups, ages):
    print('Number of Patients in age group', age + ':\t', len(df_age))

### Theta and Delta

In [None]:
theta, delta = calc_prob(df_age_groups, ages)

### Hospital to Recovered (R5)

In [None]:
R5_mean, R5_std = calc_time_hosp_rec(df_age_groups, ages)

### Infected to Hospital (R6)

In [None]:
R6_mean, R6_std = calc_time_inf_hosp(df_age_groups, ages)

### Hospital to ICU (R7)

In [None]:
R7_mean, R7_std = calc_time_hosp_ICU(df_age_groups, ages)

### ICU to Recovered (R8)

In [None]:
R8_mean, R8_std = calc_time_ICU_rec(df_age_groups, ages)

### ICU to Death (R10)

In [None]:
R10_mean, R10_std = calc_time_ICU_death(df_age_groups, ages)

In [None]:
params = pd.DataFrame([theta, delta, 
                       R5_mean, R6_mean, R7_mean, R8_mean, R10_mean, 
                       R5_std, R6_std, R7_std, R8_std, R10_std], 
                      columns=ages, index=['theta', 'delta', 
                       'R5_mean', 'R6_mean', 'R7_mean', 'R8_mean', 'R10_mean', 
                       'R5_std', 'R6_std', 'R7_std', 'R8_std', 'R10_std']).T
display(params)
params.to_csv('params_3age_groups.csv')