# Analysis of outcomes concepts tables - SMC

Look-up tables used for this analysis have been created using:  
- ReadV2:  V2_2_3_MELD_concepts_readv2
- ICD10:  V2_2_3_MELD_icd10_no_translate

In this file we carry on some elementary analysis for the WP02_CONC_READ/ICD10 tables for some specific concepts. In particular we look at concepts for which either a ReadV2 codes list or a ICD10 code list is avaialable. 

In particular we aim to give an answer to the following questions: 
- How many unique patients have been diagnosed with that concept *at least* one time? How many more times? 
- How many total admission for that concept did we record in PEDW and WLGP? 
- What is the average age of the individuals when they get diagnosed? 
- Is there a difference between male and female? 
- How many records does an individual have for that concept?

These information are summarised in a csv. 

Note that 
1. The outcomes tables used to derive these visualizations *do not* include any sensitive code. 

In [None]:
import SAIL_python
import pandas as pd 
import numpy as np 
import math
import datetime as dt
import matplotlib
import glob
import os
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
from textwrap import wrap
from IPython.display import Markdown, display, Math, Latex

In [None]:
con = SAIL_python.SAILConnection()
con.connect()

In [None]:
#functions
def printmd(string):
    display(Markdown(string))
    
def round_10_a (array):
    new_list = []
    for i in array: 
        i = round_10(i)
        new_list.append(i)
    return new_list

In [None]:
#phen names, fixed tables
q_total = """
            SELECT * FROM SAILW1377V.WP02_COHORT_TABLE
        """
q_wlgp = """
        SELECT * FROM SYSCAT.TABLES WHERE TABSCHEMA = 'SAILW1377V' AND TABNAME LIKE 'WP02_CONC_EXT_WLGP_%' AND type = 'T'
        """
q_pedw = """
        SELECT * FROM SYSCAT.TABLES WHERE TABSCHEMA = 'SAILW1377V' AND TABNAME LIKE 'WP02_CONC_EXT_PEDW_%' AND type = 'T'
         """


sens_conc = ['HIVAIDS', 'VIRAL_HEPATITIS']

tot_cohort = con.query(q_total, echo_level = 0)
tables_w = con.query(q_wlgp, echo_level = 0)['TABNAME'] 
tables_p = con.query(q_pedw, echo_level = 0)['TABNAME'] 

phen_w = []
for i in tables_w: 
    phen_w.append(i.removeprefix('WP02_CONC_EXT_WLGP_'))
    
phen_p = []
for i in tables_p: 
    phen_p.append(i.removeprefix('WP02_CONC_EXT_PEDW_'))

phen_list = list(set(phen_w + phen_p)) #removing duplicates
phen_list.sort() #alphabetical order 

lsoa_table = 'WP02_WDSD_GP_BREAK'
cohort_table = 'WP02_COHORT_TABLE'
total_cohort = len(tot_cohort)
cohort_st_dt = dt.date(2000,1,1)

In [None]:
#csv columns 
name_list = []   #phen name 
wlgp_alf_nunique_list = []  #unique ALF in WLGP_EVENTS
pedw_alf_nunique_list = []  #unique ALF in PEDW
tot_alf_nunique_list = [] #unique ALF in WLGP or PEDW

tot_admission_wlgp_list = [] #number of admission for phen in WLGP 
tot_spell_reason = [] #number of admission in PEDW caused by phen
tot_spell_noreason = [] #number of admission in PEDW where phen has been recorded but was ot the reason of the hospital admission

wlgp_male_list = [] #number of unique male for phen in WLGP 
wlgp_female_list = [] #number of unique female for phen in WLGP
pedw_male_list = [] #number of unique male for phen in PEDW
pedw_female_list = [] #number of unique female for phen in PEDW
tot_male_list = [] #number of unique male for phen in WLGP or PEDW
tot_female_list = [] #number of unique female for phen in WLGP or PEDW

wlgp_avg_age_list = [] #avg age for phen in WLGP 
wlgp_avg_age_f_list = [] #ave age for female for phen in WLGP
wlgp_avg_age_m_list = [] #avg age for male for phen in WLGP 
pedw_avg_age_list = [] #avg age for phen in PEDW 
pedw_avg_age_f_list = [] #ave age for female for phen in PEDW
pedw_avg_age_m_list = [] #avg age for male for phen in PEDW
tot_avg_age_list = [] #avg age for phen in WLGP or PEDW  
tot_avg_age_f_list = [] #ave age for female for phen in WLGP or PEDW 
tot_avg_age_m_list = [] #avg age for male for phen in WLGP or PEDW 
tot_med_age_list = []
tot_med_age_f_list = []
tot_med_age_m_list = []
tot_mod_age_list = []
tot_mod_age_f_list = []
tot_mod_age_m_list = []

w_age_group1_m = [] # number of male diagnosed with age 0-10 in WLGP 
w_age_group1_f = [] # number of female diagnosed with age 0-10 
w_age_group2_m = [] # 10-20
w_age_group2_f = []
w_age_group3_m = [] #20-30
w_age_group3_f = []
w_age_group4_m = [] #30-40
w_age_group4_f = []
w_age_group5_m = [] #40-50
w_age_group5_f = []
w_age_group6_m = [] #50-60
w_age_group6_f = []
w_age_group7_m = [] #60-70
w_age_group7_f = []
w_age_group8_m = [] #70-80
w_age_group8_f = []
w_age_group9_m = [] #80-90
w_age_group9_f = []
w_age_group10_m = [] #90-100
w_age_group10_f = []

p_age_group1_m = [] #number of male diagnosed with age 0-10 in PEDW
p_age_group1_f = []
p_age_group2_m = []
p_age_group2_f = []
p_age_group3_m = []
p_age_group3_f = []
p_age_group4_m = []
p_age_group4_f = []
p_age_group5_m = []
p_age_group5_f = []
p_age_group6_m = []
p_age_group6_f = []
p_age_group7_m = []
p_age_group7_f = []
p_age_group8_m = []
p_age_group8_f = []
p_age_group9_m = []
p_age_group9_f = []
p_age_group10_m = []
p_age_group10_f = []

t_age_group1_m = [] # number of male diagnosed with age 0-10 in WLGP or PEDW
t_age_group1_f = []
t_age_group2_m = []
t_age_group2_f = []
t_age_group3_m = []
t_age_group3_f = []
t_age_group4_m = []
t_age_group4_f = []
t_age_group5_m = []
t_age_group5_f = []
t_age_group6_m = []
t_age_group6_f = []
t_age_group7_m = []
t_age_group7_f = []
t_age_group8_m = []
t_age_group8_f = []
t_age_group9_m = []
t_age_group9_f = []
t_age_group10_m = []
t_age_group10_f = []

wlgp_prev = [] #wlgp prevalent cases
wlgp_inci = [] #wlgp incident cases 
pedw_prev = [] #pedw prevalent cases 
pedw_inci = [] #pedw incident cases


In [None]:
for i in phen_list: 
    #we are not processing medications at this point 
    if 'ALL_MEDICATIONS' in i:
        printmd(f'## CONCEPT: {i} \n')
        print('PASS')
        pass 
    
    elif 'PAIN_MEDICATIONS' in i:
        printmd(f'## CONCEPT: {i} \n')
        print('PASS')
        pass         
    
    else:
        if i in sens_conc:
            display(Markdown('---'))
            printmd(f'## CONCEPT: {i} \n')
            printmd('Sensitive concept \n ')
            continue 

        else: 
            name_list.append(i)

            wlgp_name = 'WP02_CONC_EXT_WLGP_' + i
            pedw_name = 'WP02_CONC_EXT_PEDW_' + i 

            display(Markdown('---'))

            printmd(f'## CONCEPT: {i} \n')

            #check if 'i' is available in WLGP (i.e if there is an outcome table in the database)
            if i in phen_w: 
                q_wlgp = """
                WITH DEM AS (
                SELECT A.*, B.GNDR_CD, 
                       YEARS_BETWEEN(A.EVENT_DT, B.WOB) AS AGE_EVENT
                FROM SAILW1377V.%s A
                LEFT JOIN SAILW1377V.%s B 
                ON A.ALF_PE = B.ALF_PE 
                ), 
                WIMD AS (
                SELECT A.*, 
                       B.WIMD_2019_QUINTILE AS WIMD
                FROM SAILW1377V.%s AS A
                LEFT JOIN SAILW1377V.%s B 
                ON A.ALF_PE = B.ALF_PE 
                WHERE A.EVENT_DT BETWEEN B.ACTIVEFROM AND B.ACTIVETO
                ), 
                TOT AS (
                SELECT DISTINCT T1.*, 
                       T2.WIMD 
                FROM DEM T1 
                LEFT JOIN WIMD T2 
                ON 
                    T1.ALF_PE = T2.ALF_PE 
                    AND T1.EVENT_DT = T2.EVENT_DT
                    AND T1.EVENT_CD = T2.EVENT_CD
                )
                SELECT A.*, ROW_NUMBER() OVER (PARTITION BY A.ALF_PE ORDER BY A.ALF_PE, A.EVENT_DT) AS NUM_EVENT
                FROM TOT A
                JOIN SAILW1377V.%s B 
                ON A.ALF_PE = B.ALF_PE
                """ % (wlgp_name, cohort_table, wlgp_name, lsoa_table, cohort_table)

                wlgp_table = con.query(q_wlgp, echo_level = 0)
                wlgp_partial = wlgp_table[["ALF_PE", "GNDR_CD", "AGE_EVENT", "NUM_EVENT"]]

                #individuals number 
                wlgp_alf_nunique = round_10(wlgp_table.ALF_PE.nunique())
                wlgp_alf_nunique_list.append(wlgp_alf_nunique)

                #Number of total admissions
                tot_admission_wlgp = round_10(len(wlgp_table.groupby(['ALF_PE', 'EVENT_DT'])))
                tot_admission_wlgp_list.append(tot_admission_wlgp)

                #gender 
                wlgp_table_m = wlgp_table[wlgp_table.GNDR_CD == 1]
                wlgp_table_f = wlgp_table[wlgp_table.GNDR_CD == 2]

                wlgp_male_list.append(round_10(wlgp_table_m.ALF_PE.nunique()))
                wlgp_female_list.append(round_10(wlgp_table_f.ALF_PE.nunique()))

                #age index date 
                wlgp_index = wlgp_partial[wlgp_partial['NUM_EVENT'] == 1] #lista di alf con l'eta' index date 
                wlgp_index_m = wlgp_index[wlgp_index['GNDR_CD'] == 1]
                wlgp_index_f = wlgp_index[wlgp_index['GNDR_CD'] == 2]

                #avg index age
                wlgp_avg_age = round(wlgp_index['AGE_EVENT'].mean(),1)
                wlgp_avg_age_m = round(wlgp_index_m['AGE_EVENT'].mean(),1)
                wlgp_avg_age_f = round(wlgp_index_f['AGE_EVENT'].mean(),1)

                wlgp_avg_age_list.append(wlgp_avg_age)
                wlgp_avg_age_f_list.append(wlgp_avg_age_f)
                wlgp_avg_age_m_list.append(wlgp_avg_age_m)

                age_group = np.array(list(range(0, 110, 10)))
                wlgp_hist_m, _ = np.histogram(wlgp_index_m['AGE_EVENT'], age_group)
                wlgp_hist_f, _ = np.histogram(wlgp_index_f['AGE_EVENT'], age_group)
                wlgp_counts_m = round_10_a(wlgp_hist_m)
                wlgp_counts_f = round_10_a(wlgp_hist_f)

                w_age_group1_m.append(wlgp_counts_m[0])
                w_age_group1_f.append(wlgp_counts_f[0])
                w_age_group2_m.append(wlgp_counts_m[1])
                w_age_group2_f.append(wlgp_counts_f[1])
                w_age_group3_m.append(wlgp_counts_m[2])
                w_age_group3_f.append(wlgp_counts_f[2])
                w_age_group4_m.append(wlgp_counts_m[3])
                w_age_group4_f.append(wlgp_counts_f[3])
                w_age_group5_m.append(wlgp_counts_m[4])
                w_age_group5_f.append(wlgp_counts_f[4])
                w_age_group6_m.append(wlgp_counts_m[5])
                w_age_group6_f.append(wlgp_counts_f[6])
                w_age_group7_m.append(wlgp_counts_m[6])
                w_age_group7_f.append(wlgp_counts_f[6])
                w_age_group8_m.append(wlgp_counts_m[7])
                w_age_group8_f.append(wlgp_counts_f[7])
                w_age_group9_m.append(wlgp_counts_m[8])
                w_age_group9_f.append(wlgp_counts_f[8])
                w_age_group10_m.append(wlgp_counts_m[9])
                w_age_group10_f.append(wlgp_counts_f[9])

                #number of record for individual
                values_w = wlgp_table.ALF_PE.value_counts()
                df_values_wlgp= pd.DataFrame(values_w).reset_index()
                df_values_wlgp.columns= ['ALF_PE', 'count']

                #incident & prevalent cases 
                wlgp_table2 = wlgp_table.drop_duplicates(['ALF_PE', 'EVENT_DT'], keep = 'first')
                wlgp_table2_n = round_10(len(wlgp_table2))
                wlgp_incident_count = round_10(len(wlgp_table2[wlgp_table2['EVENT_DT'] >= cohort_st_dt]))
                wlgp_prevalent_count = round_10(len(wlgp_table2[wlgp_table2['EVENT_DT'] < cohort_st_dt]))

                wlgp_prev.append(wlgp_prevalent_count) 
                wlgp_inci.append(wlgp_incident_count)

            else:
                wlgp_partial = pd.DataFrame(columns = ["ALF_PE", "GNDR_CD", "AGE_EVENT"])
                wlgp_index = pd.DataFrame(columns = ["ALF_PE", "GNDR_CD", "AGE_EVENT"])
                wlgp_alf_nunique = 'No WLGP data available'
                wlgp_alf_nunique_list.append(wlgp_alf_nunique)
                tot_admission_wlgp = 'No WLGP data available'
                tot_admission_wlgp_list.append(tot_admission_wlgp)
                wlgp_table_m = 'No WLGP data available'
                wlgp_table_f = 'No WLGP data available'
                wlgp_male_list.append(wlgp_table_m)
                wlgp_female_list.append(wlgp_table_f)

                wlgp_avg_age = 'No WLGP data available'
                wlgp_avg_age_m = 'No WLGP data available'
                wlgp_avg_age_f = 'No WLGP data available'
                wlgp_avg_age_list.append(wlgp_avg_age)
                wlgp_avg_age_f_list.append( wlgp_avg_age_f)
                wlgp_avg_age_m_list.append( wlgp_avg_age_m)

                w_age_group1_m.append('No WLGP data available')
                w_age_group1_f.append('No WLGP data available')
                w_age_group2_m.append('No WLGP data available')
                w_age_group2_f.append('No WLGP data available')
                w_age_group3_m.append('No WLGP data available')
                w_age_group3_f.append('No WLGP data available')
                w_age_group4_m.append('No WLGP data available')
                w_age_group4_f.append('No WLGP data available')
                w_age_group5_m.append('No WLGP data available')
                w_age_group5_f.append('No WLGP data available')
                w_age_group6_m.append('No WLGP data available')
                w_age_group6_f.append('No WLGP data available')
                w_age_group7_m.append('No WLGP data available')
                w_age_group7_f.append('No WLGP data available')
                w_age_group8_m.append('No WLGP data available')
                w_age_group8_f.append('No WLGP data available')
                w_age_group9_m.append('No WLGP data available')
                w_age_group9_f.append('No WLGP data available')
                w_age_group10_m.append('No WLGP data available')
                w_age_group10_f.append('No WLGP data available')

                wlgp_prev.append('No WLGP data available') 
                wlgp_inci.append('No WLGP data available')

                wlgp_table2_n = 'No WLGP data available'
                wlgp_incident_count = 'No WLGP data available'
                wlgp_prevalent_count = 'No WLGP data available'

            #check if 'i' is available in PEDW (i.e if there is an outcome table in the database)
            if i in phen_p: 
                q_pedw = """
                    WITH DEM AS (
                    SELECT A.*, B.GNDR_CD, 
                           YEARS_BETWEEN(A.ADMIS_DT, B.WOB) AS AGE_EVENT
                    FROM SAILW1377V.%s A
                    LEFT JOIN SAILW1377V.%s B 
                    ON A.ALF_PE = B.ALF_PE 
                    ), 
                    WIMD AS (
                    SELECT A.*, 
                           B.WIMD_2019_QUINTILE AS WIMD
                    FROM SAILW1377V.%s AS A
                    LEFT JOIN SAILW1377V.%s B 
                    ON A.ALF_PE = B.ALF_PE 
                    WHERE A.ADMIS_DT BETWEEN B.ACTIVEFROM AND B.ACTIVETO
                    ), 
                    TOT AS (
                    SELECT DISTINCT T1.*, 
                           T2.WIMD 
                    FROM DEM T1 
                    LEFT JOIN WIMD T2 
                    ON 
                        T1.ALF_PE = T2.ALF_PE 
                        AND T1.ADMIS_DT  = T2.ADMIS_DT
                        AND T1.DIAG_CD = T2.DIAG_CD
                    )
                    SELECT *,  ROW_NUMBER() OVER (PARTITION BY ALF_PE ORDER BY ALF_PE, ADMIS_DT) AS NUM_EVENT FROM TOT
                     """ % (pedw_name, cohort_table, pedw_name, lsoa_table)

                pedw_table = con.query(q_pedw, echo_level = 0)
                pedw_partial = pedw_table[["ALF_PE", "GNDR_CD", "AGE_EVENT", "NUM_EVENT"]]

                pedw_alf_nunique = round_10(pedw_table.ALF_PE.nunique())
                pedw_alf_nunique_list.append(pedw_alf_nunique)

                epi_1 = pedw_table[pedw_table.EPI_NUM == '01']
                epi_1_nunique = round_10(epi_1.SPELL_NUM_PE.nunique())
                spell_1 = epi_1.SPELL_NUM_PE.unique()
                epi_n = pedw_table[~pedw_table.SPELL_NUM_PE.isin(spell_1)]
                epi_n_nunique = round_10(epi_n.SPELL_NUM_PE.nunique())
                tot_spell_reason.append(epi_1_nunique) #number of admission in PEDW caused by phen
                tot_spell_noreason.append(epi_n_nunique)

                pedw_table_m = pedw_table[pedw_table.GNDR_CD == 1]
                pedw_table_f = pedw_table[pedw_table.GNDR_CD == 2]
                pedw_male_list.append(round_10(pedw_table_m.ALF_PE.nunique()))
                pedw_female_list.append(round_10(pedw_table_m.ALF_PE.nunique()))
                
                #AGE INDEX DATE 
                pedw_index = pedw_partial[pedw_partial['NUM_EVENT'] == 1] #lista di alf con l'eta' index date 
                pedw_index_m = pedw_index[pedw_index['GNDR_CD'] == 1]
                pedw_index_f = pedw_index[pedw_index['GNDR_CD'] == 2]

                #avg index age
                pedw_avg_age = round(pedw_index['AGE_EVENT'].mean(),1)
                pedw_avg_age_m = round(pedw_index_m['AGE_EVENT'].mean(),1)
                pedw_avg_age_f = round(pedw_index_f['AGE_EVENT'].mean(),1)
                pedw_avg_age_list.append(pedw_avg_age)
                pedw_avg_age_f_list.append(pedw_avg_age_f)
                pedw_avg_age_m_list.append(pedw_avg_age_m)

                age_group = np.array(list(range(0, 110, 10)))
                pedw_hist_m, _ = np.histogram(pedw_index_m['AGE_EVENT'], age_group)
                pedw_hist_f, _ = np.histogram(pedw_index_f['AGE_EVENT'], age_group)
                pedw_counts_m = round_10_a(pedw_hist_m)
                pedw_counts_f = round_10_a(pedw_hist_f)

                p_age_group1_m.append(pedw_counts_m[0])
                p_age_group1_f.append(pedw_counts_f[0])
                p_age_group2_m.append(pedw_counts_m[1])
                p_age_group2_f.append(pedw_counts_f[1])
                p_age_group3_m.append(pedw_counts_m[2])
                p_age_group3_f.append(pedw_counts_f[2])
                p_age_group4_m.append(pedw_counts_m[3])
                p_age_group4_f.append(pedw_counts_f[3])
                p_age_group5_m.append(pedw_counts_m[4])
                p_age_group5_f.append(pedw_counts_f[4])
                p_age_group6_m.append(pedw_counts_m[5])
                p_age_group6_f.append(pedw_counts_f[6])
                p_age_group7_m.append(pedw_counts_m[6])
                p_age_group7_f.append(pedw_counts_f[6])
                p_age_group8_m.append(pedw_counts_m[7])
                p_age_group8_f.append(pedw_counts_f[7])
                p_age_group9_m.append(pedw_counts_m[8])
                p_age_group9_f.append(pedw_counts_f[8])
                p_age_group10_m.append(pedw_counts_m[9])
                p_age_group10_f.append(pedw_counts_f[9])

                values_p = pedw_table.ALF_PE.value_counts()
                df_values_pedw= pd.DataFrame(values_p).reset_index()
                df_values_pedw.columns= ['ALF_PE', 'count']

                mean_pedw = round(df_values_pedw['count'].mean(), 1)
                std_pedw = round(df_values_pedw['count'].std(),2)

                pedw_table2 = pedw_table.drop_duplicates(['ALF_PE', 'ADMIS_DT', 'EPI_STR_DT'], keep = 'first')
                pedw_table2_n = round_10(len(pedw_table2))
                pedw_incident_count = round_10(len(pedw_table2[pedw_table2['ADMIS_DT'] >= cohort_st_dt]))
                pedw_prevalent_count = round_10(len(pedw_table2[pedw_table2['ADMIS_DT'] < cohort_st_dt]))

                pedw_prev.append(pedw_prevalent_count)
                pedw_inci.append(pedw_incident_count)

            else:
                pedw_partial = pd.DataFrame(columns = ["ALF_PE", "GNDR_CD", "AGE_EVENT"])
                pedw_index = pd.DataFrame(columns = ["ALF_PE", "GNDR_CD", "AGE_EVENT"])
                pedw_alf_nunique = 'No PEDW data available'
                pedw_alf_nunique_list.append(pedw_alf_nunique)
                epi_1_nunique = 'No PEDW data available'
                epi_n_nunique = 'No PEDW data available'
                tot_spell_reason.append(epi_1_nunique) #number of admission in PEDW caused by phen
                tot_spell_noreason.append(epi_1_nunique)
                pedw_table_m = 'No PEDW data available' 
                pedw_table_f = 'No PEDW data available'
                pedw_male_list.append(pedw_table_m)
                pedw_female_list.append(pedw_table_f)

                pedw_avg_age = 'No PEDW data available'
                pedw_avg_age_m = 'No PEDW data available'
                pedw_avg_age_f = 'No PEDW data available'
                pedw_avg_age_list.append(pedw_avg_age)
                pedw_avg_age_f_list.append(pedw_avg_age_f)
                pedw_avg_age_m_list.append(pedw_avg_age_m)

                p_age_group1_m.append('No PEDW data available')
                p_age_group1_f.append('No PEDW data available')
                p_age_group2_m.append('No PEDW data available')
                p_age_group2_f.append('No PEDW data available')
                p_age_group3_m.append('No PEDW data available')
                p_age_group3_f.append('No PEDW data available')
                p_age_group4_m.append('No PEDW data available')
                p_age_group4_f.append('No PEDW data available')
                p_age_group5_m.append('No PEDW data available')
                p_age_group5_f.append('No PEDW data available')
                p_age_group6_m.append('No PEDW data available')
                p_age_group6_f.append('No PEDW data available')
                p_age_group7_m.append('No PEDW data available')
                p_age_group7_f.append('No PEDW data available')
                p_age_group8_m.append('No PEDW data available')
                p_age_group8_f.append('No PEDW data available')
                p_age_group9_m.append('No PEDW data available')
                p_age_group9_f.append('No PEDW data available')
                p_age_group10_m.append('No PEDW data available')
                p_age_group10_f.append('No PEDW data available')

                pedw_prev.append('No PEDW data available')
                pedw_inci.append('No PEDW data available')

                pedw_table2_n = 'No PEDW data available'
                pedw_incident_count = 'No PEDW data available'
                pedw_prevalent_count = 'No PEDW data available'


            #total 
            tot_table = pd.concat([wlgp_partial, pedw_partial], ignore_index = True)
            tot_alf_nunique = round_10(tot_table.ALF_PE.nunique())
            tot_alf_nunique_list.append(tot_alf_nunique)
    
            tot_table_m = tot_table[tot_table.GNDR_CD == 1]
            tot_table_f = tot_table[tot_table.GNDR_CD == 2]
            tot_male_list.append(round_10(tot_table_m.ALF_PE.nunique()))
            tot_female_list.append(round_10(tot_table_f.ALF_PE.nunique()))

            #index date 
            tot_index = tot_table.groupby(['ALF_PE', 'GNDR_CD'])['AGE_EVENT'].min().reset_index()

            tot_index_m = tot_index[tot_index['GNDR_CD'] == 1.0]
            tot_index_f = tot_index[tot_index['GNDR_CD'] == 2.0]

            #avg index age
            tot_avg_age = round(tot_index['AGE_EVENT'].mean(),1)
            tot_avg_age_m = round(tot_index_m['AGE_EVENT'].mean(),1)
            tot_avg_age_f = round(tot_index_f['AGE_EVENT'].mean(),1)
            tot_avg_age_list.append(tot_avg_age)
            tot_avg_age_f_list.append(tot_avg_age_f)
            tot_avg_age_m_list.append(tot_avg_age_m)

            tot_med_age = round(tot_index['AGE_EVENT'].median(),1)
            tot_med_age_m = round(tot_index_m['AGE_EVENT'].median(),1)
            tot_med_age_f = round(tot_index_f['AGE_EVENT'].median(),1)
            tot_med_age_list.append(tot_med_age)
            tot_med_age_f_list.append(tot_med_age_f)
            tot_med_age_m_list.append(tot_med_age_m)

            tot_mod_age = round(tot_index['AGE_EVENT'].mode(),1)
            tot_mod_age_m = round(tot_index_m['AGE_EVENT'].mode(),1)
            tot_mod_age_f = round(tot_index_f['AGE_EVENT'].mode(),1)
            tot_mod_age_list.append(tot_mod_age)
            tot_mod_age_f_list.append(tot_mod_age_f)
            tot_mod_age_m_list.append(tot_mod_age_m)
            
            age_group = np.array(list(range(0, 110, 10)))

            tot_hist_m, _ = np.histogram(tot_index_m['AGE_EVENT'], age_group)
            tot_hist_f, _ = np.histogram(tot_index_f['AGE_EVENT'], age_group)
            tot_counts_m = round_10_a(tot_hist_m)
            tot_counts_f = round_10_a(tot_hist_f)

            t_age_group1_m.append(tot_counts_m[0])
            t_age_group1_f.append(tot_counts_f[0])
            t_age_group2_m.append(tot_counts_m[1])
            t_age_group2_f.append(tot_counts_f[1])
            t_age_group3_m.append(tot_counts_m[2])
            t_age_group3_f.append(tot_counts_f[2])
            t_age_group4_m.append(tot_counts_m[3])
            t_age_group4_f.append(tot_counts_f[3])
            t_age_group5_m.append(tot_counts_m[4])
            t_age_group5_f.append(tot_counts_f[4])
            t_age_group6_m.append(tot_counts_m[5])
            t_age_group6_f.append(tot_counts_f[6])
            t_age_group7_m.append(tot_counts_m[6])
            t_age_group7_f.append(tot_counts_f[6])
            t_age_group8_m.append(tot_counts_m[7])
            t_age_group8_f.append(tot_counts_f[7])
            t_age_group9_m.append(tot_counts_m[8])
            t_age_group9_f.append(tot_counts_f[8])
            t_age_group10_m.append(tot_counts_m[9])
            t_age_group10_f.append(tot_counts_f[9])

In [None]:
#create a dataframe and a csv 
data_list = {'Concept': name_list, 
        'N of individuals (WLGP)':wlgp_alf_nunique_list, 'N of individuals PEDW': pedw_alf_nunique_list,
        'N of individuals (PEDW or WLGP)': tot_alf_nunique_list, 
        'N of male individuals (WLGP)':wlgp_male_list ,'N of male individuals PEDW':pedw_male_list,
        'N of male individuals (WLGP or PEDW)': tot_male_list,
        'N of female individuals (WLGP)':wlgp_female_list, 'N of female individuals PEDW':pedw_female_list,
        'N of female individuals (PEDW or WLGP)':tot_female_list,
        'N of admissions for concept (WLGP)' :tot_admission_wlgp_list,
        'N of admissions for concept (PEDW)': tot_spell_reason, 
        'Average onset age of individual (WLGP)': wlgp_avg_age_list, 'Average onset age of indiduals PEDW': pedw_avg_age_list, 
        'Average onset age of individual (PEDW or WLGP)':tot_avg_age_list, 
        'Average onset age of male individual (WLGP)': wlgp_avg_age_m_list, 'Average onset age of male indiduals PEDW':pedw_avg_age_m_list, 
        'Average onset age of male individual (PEDW or WLGP)': tot_avg_age_m_list,
        'Average onset age of female individual (WLGP)': wlgp_avg_age_f_list, 'Average onset age of female indiduals PEDW': pedw_avg_age_f_list, 
        'Average onset age of female individual (PEDW or WLGP)': tot_avg_age_f_list, 
        'Median onset age of individual (PEDW or WLGP)':tot_med_age_list, 
        'Median onset age of male individual (PEDW or WLGP)': tot_med_age_m_list,
        'Median onset age of female individual (PEDW or WLGP)': tot_med_age_f_list, 
        'Mode onset age of individual (PEDW or WLGP)':tot_mod_age_list, 
        'Mode onset age of male individual (PEDW or WLGP)': tot_mod_age_m_list,
        'Mode onset age of female individual (PEDW or WLGP)': tot_mod_age_f_list , 
        'N of male individual with onset age 0-9 (WLGP)' : w_age_group1_m, 'N of female individual with onset age 0-9 (WLGP)' : w_age_group1_f,
        'N of male individual with onset age 0-9 (PEDW)' : p_age_group1_m, 'N of female individual with onset age 0-9 (PEDW)' : p_age_group1_f,
        'N of male individual with onset age 0-9 (WLGP or PEDW)' : t_age_group1_m,  'N of female individual with onset age 0-9 (WLGP or PEDW)' : t_age_group1_f,
        'N of male individual with onset age 10-19 (WLGP)' : w_age_group2_m, 'N of female individual with onset age 10-19 (WLGP)' : w_age_group2_f,
        'N of male individual with onset age 10-19 (PEDW)' : p_age_group2_m, 'N of female individual with onset age 10-19 (PEDW)' : p_age_group2_f,
        'N of male individual with onset age 10-19 (WLGP or PEDW)' : t_age_group2_m,  'N of female individual with onset age 10-19 (WLGP or PEDW)' : t_age_group2_f,
        'N of male individual with onset age 20-29 (WLGP)' : w_age_group3_m, 'N of female individual with onset age 20-29 (WLGP)' : w_age_group3_f,
        'N of male individual with onset age 20-29 (PEDW)' : p_age_group3_m, 'N of female individual with onset age 20-29 (PEDW)' : p_age_group3_f,
        'N of male individual with onset age 20-29 (WLGP or PEDW)' : t_age_group3_m,  'N of female individual with onset age 20-29 (WLGP or PEDW)' : t_age_group3_f,
        'N of male individual with onset age 30-39 (WLGP)' : w_age_group4_m, 'N of female individual with onset age 30-39 (WLGP)' : w_age_group4_f,
        'N of male individual with onset age 30-39 (PEDW)' : p_age_group4_m, 'N of female individual with onset age 30-39 (PEDW)' : p_age_group4_f,
        'N of male individual with onset age 30-39 (WLGP or PEDW)' : t_age_group4_m,  'N of female individual with onset age 30-39 (WLGP or PEDW)' : t_age_group4_f,
        'N of male individual with onset age 40-49 (WLGP)' : w_age_group5_m, 'N of female individual with onset age 40-49 (WLGP)' : w_age_group5_f,
        'N of male individual with onset age 40-49 (PEDW)' : p_age_group5_m, 'N of female individual with onset age 40-49 (PEDW)' : p_age_group5_f,
        'N of male individual with onset age 40-49 (WLGP or PEDW)' : t_age_group5_m,  'N of female individual with onset age 40-49 (WLGP or PEDW)' : t_age_group5_f,
        'N of male individual with onset age 50-59 (WLGP)' : w_age_group6_m, 'N of female individual with onset age 50-59 (WLGP)' : w_age_group6_f,
        'N of male individual with onset age 50-59 (PEDW)' : p_age_group6_m, 'N of female individual with onset age 50-59 (PEDW)' : p_age_group6_f,
        'N of male individual with onset age 50-59 (WLGP or PEDW)' : t_age_group6_m,  'N of female individual with onset age 50-59 (WLGP or PEDW)' : t_age_group6_f,
        'N of male individual with onset age 60-69 (WLGP)' : w_age_group7_m, 'N of female individual with onset age 60-69 (WLGP)' : w_age_group7_f,
        'N of male individual with onset age 60-69 (PEDW)' : p_age_group7_m, 'N of female individual with onset age 60-69 (PEDW)' : p_age_group7_f,
        'N of male individual with onset age 60-69 (WLGP or PEDW)' : t_age_group7_m,  'N of female individual with onset age 60-69 (WLGP or PEDW)' : t_age_group7_f,
        'N of male individual with onset age 70-79 (WLGP)' : w_age_group8_m, 'N of female individual with onset age 70-79 (WLGP)' : w_age_group8_f,
        'N of male individual with onset age 70-79 (PEDW)' : p_age_group8_m, 'N of female individual with onset age 70-79 (PEDW)' : p_age_group8_f,
        'N of male individual with onset age 70-79 (WLGP or PEDW)' : t_age_group8_m,  'N of female individual with onset age 70-79 (WLGP or PEDW)' : t_age_group8_f,
        'N of male individual with onset age 80-89 (WLGP)' : w_age_group9_m, 'N of female individual with onset age 80-89 (WLGP)' : w_age_group9_f,
        'N of male individual with onset age 80-89 (PEDW)' : p_age_group9_m, 'N of female individual with onset age 80-89 (PEDW)' : p_age_group9_f,
        'N of male individual with onset age 80-89 (WLGP or PEDW)' : t_age_group9_m,  'N of female individual with onset age 80-89 (WLGP or PEDW)' : t_age_group9_f,
        'N of male individual with onset age 90-99 (WLGP)' : w_age_group10_m, 'N of female individual with onset age 90-99 (WLGP)' : w_age_group10_f,
        'N of male individual with onset age 90-99 (PEDW)' : p_age_group10_m, 'N of female individual with onset age 90-99 (PEDW)' : p_age_group10_f,
        'N of male individual with onset age 90-99 (WLGP or PEDW)' : t_age_group10_m,  'N of female individual with onset age 90-99 (WLGP or PEDW)' : t_age_group10_f,
            }

table = pd.DataFrame(data = data_list)
table.to_csv('CONC_outputs/WP02_outcomes_analysis_.csv')