In [1]:
# Preferences of autoformatting & Multiple Output
%load_ext nb_black

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import warnings

warnings.filterwarnings("ignore")

import researchpy as rp  # For auto-statistics/EDA of dataframe
from tqdm.notebook import tqdm  # For process display
import pingouin as pg

import sys

sys.path.append("../src")

from data import *
from utils import *
from conf import *
from tools import *

import utils as UT
import tools as TS

import random

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion
from mlxtend.evaluate import PredefinedHoldoutSplit
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

from sklearn import set_config

set_config(display="diagram")

pd.set_option("display.max_colwidth", 200)

<IPython.core.display.Javascript object>

In [2]:
df_child = pd.read_excel("../output/CHILD_with_addon.xlsx", index_col="Subject_Number")
df_child

Unnamed: 0_level_0,Sex,Gest_Days,Weight_0m,Weight_3m,Weight_12m,Weight_36m,Weight_60m,Weight_for_age_0m,Weight_for_age_3m,Weight_for_age_12m,...,Child_Ethnicity,Child_Atopy_1y,Child_Food_1y,Child_Inhalant_1y,Child_Atopy_3y,Child_Food_3y,Child_Inhalant_3y,Child_Atopy_5y,Child_Food_5y,Child_Inhalant_5y
Subject_Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20001,F,273.0,3.255,,11.8,20.0,25.5,0.05,,0.09,...,,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
20002,M,275.0,3.195,,,,,-0.31,,,...,,,,,,,,,,
20003,F,279.0,4.490,,11.0,,,2.47,,,...,,,,,,,,,,
20005,F,280.0,2.880,,,,,-0.80,,,...,,,,,,,,,,
20006,F,271.0,3.335,,11.1,17.8,20.1,0.22,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50861,M,275.0,2.520,5.66,9.2,14.2,16.5,-1.85,-1.41,-0.42,...,NonCaucas,0.0,0.0,0.0,,,,1.0,0.0,1.0
50862,M,272.0,3.648,7.04,9.7,14.3,16.8,0.60,0.52,-0.03,...,Caucasian,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0
50863,F,288.0,3.352,6.86,11.6,,,0.26,1.39,1.86,...,Caucasian,0.0,0.0,0.0,,,,,,
50864,F,275.0,4.086,6.53,9.1,14.2,16.1,1.73,0.80,-0.09,...,HalfCaucas,0.0,0.0,0.0,0.0,0.0,0.0,,,


<IPython.core.display.Javascript object>

In [8]:
df_child.columns[df_child.columns.str.contains("_24m")]

Index(['BF_24m', 'PSS_24m', 'CESD_24m', 'Cumulative_Wheeze_24m'], dtype='object')

<IPython.core.display.Javascript object>

In [26]:
def feature_grouping_generator(df, group_type="four_timepoints"):
    """
    group_type: str, default: "four_timepoints"
        other available options include "five_timepoints", "four_categories", "three_categories", "modifiability_categories", "detailed_timepoints", "detailed_categories"
    -----------------------------------------------
    return: a dictionary and a dataframe for display
    """

    # Detailed Timepoints Dictionary

    detailed_timepoints_dict = {}

    detailed_timepoints_mapping = {  ### PAY SPECIAL ATTENTION TO SPACES WITHIN QUOTATION MARK!
        "3m": "_3m|_1m$",  # 3 months + 1 month
        "6m": "_6m",  # 6 months
        "12m": "_9m|_12m|_1y",  # 1 Year + 9 months
        "18m": "_18m",  # 1.5 Years
        "24m": "_24m|_2y|BF_Implied",  # 2 Years
        "30m": "_2hy|_30m",  # 2.5 Years
        "36m": "_36m|_3y",  # 3 Years
        "48m": "_48m|_4y",  # 4 Years
        "60m": "_60m|_5y|Traj_Type",  # 5 Years & Traj_Type
    }

    # Create keys and values for detailed_timepoints_dict
    for k, v in detailed_timepoints_mapping.items():
        detailed_timepoints_dict[k] = set(df.columns[df.columns.str.contains(v)])

    # Put all of them together
    after_birth_set = set()
    for i in detailed_timepoints_dict.values():
        after_birth_set.update(i)

    # The at_birth features will be the remaining features
    detailed_timepoints_dict["at_birth"] = set(df.columns) - after_birth_set

    # Generate the dataframe for visualization
    detailed_timepoints_overview = pd.DataFrame(
        [detailed_timepoints_dict.keys(), detailed_timepoints_dict.values()],
        index=["Time_Point", "Features"],
    ).T.set_index("Time_Point")

    # Detailed Categories Dictionary
    detailed_categories_dict = {}
    detailed_categories_mapping = {
        "1_weight": "^Weight_",
        "2_mother_condition": "^Prenatal_",
        "3_first10min": "10min_",
        "4_breastfeeding": "^BF_",
        "5_home": "^Home",
        "6_mental": "^PSS_|^CESD_",
        "7_parental": "Mother|Father|Dad|Mom|Parental",
        "8_smoke": "Smoke",
        "9_wheeze": "Wheeze(?!.*CLA)|^Wheeze(?!.*Moth)|^Wheeze(?!.*Fath)",  # Contain wheeze but exclude *CLA, *Father, *Mother
        "10_resp": "Respiratory|^RI",
        "11_antibiotic": "Antibiotic",
        "12_childspt": "Child_Inhalant|Child_Atopy|Child_Food",
        "13_childinfo": "Child_Ethnicity|Sex",
        "15_CLA": "yCLA",
    }

    # Create keys and values for detailed_timepoints_dict
    for k, v in detailed_categories_mapping.items():
        detailed_categories_dict[k] = set(df.columns[df.columns.str.contains(v)])

    # Put all of them together
    current_set = set()
    for i in detailed_categories_dict.values():
        current_set.update(i)

    # The at_birth features will be the remaining features
    detailed_categories_dict["14_birthclinic"] = set(df.columns) - current_set

    # Generate the dataframe for visualization
    detailed_categories_overview = pd.DataFrame(
        [detailed_categories_dict.keys(), detailed_categories_dict.values()],
        index=["Type", "Features"],
    ).T.set_index("Type")

    #################################################################

    if group_type == "detailed_timepoints":

        print(
            "The available keywords for grouped features are:",
            detailed_timepoints_dict.keys(),
        )

        return detailed_timepoints_dict, detailed_timepoints_overview

    elif group_type == "detailed_categories":

        print(
            "The available keywords for grouped features are:",
            detailed_categories_dict.keys(),
        )

        return detailed_categories_dict, detailed_categories_overview

    elif group_type == "modifiability_categories":

        modifiability_categories_dict = {}
        modifiability_categories_mapping = {
            "modifiable": "Home|Smoke|Study_Center",
            "potentially_modifiable": "^Weight(?!.*_0m)|Antibiotic|^PSS_|^CESD_|delivery|10min|BF_",  # Contain Weight but exclude weight at 0m
        }

        for k, v in modifiability_categories_mapping.items():
            modifiability_categories_dict[k] = set(
                df.columns[df.columns.str.contains(v)]
            )

        # Put all of them together
        current_set = set()
        for i in modifiability_categories_dict.values():
            current_set.update(i)

        modifiability_categories_dict["unmodifiable"] = set(df.columns) - current_set

        # Generate the dataframe for visualization
        modifiability_categories_overview = pd.DataFrame(
            [
                modifiability_categories_dict.keys(),
                modifiability_categories_dict.values(),
            ],
            index=["Category", "Features"],
        ).T.set_index("Category")

        print(
            "The available keywords for grouped features are:",
            modifiability_categories_dict.keys(),
        )

        return modifiability_categories_dict, modifiability_categories_overview

    elif group_type == "four_timepoints":

        four_timepoints_dict = {}

        four_timepoints_dict["at_birth"] = detailed_timepoints_dict["at_birth"]
        four_timepoints_dict["6_months"] = (
            detailed_timepoints_dict["3m"] | detailed_timepoints_dict["6m"]
        )
        four_timepoints_dict["1_year"] = detailed_timepoints_dict["12m"]
        four_timepoints_dict["3_years"] = (
            detailed_timepoints_dict["18m"]
            | detailed_timepoints_dict["24m"]
            | detailed_timepoints_dict["30m"]
            | detailed_timepoints_dict["36m"]
        ) - set(df.columns[df.columns.str.contains("Asthma.*yCLA")])

        four_timepoints_dict["all_four_timepoints"] = (
            four_timepoints_dict["at_birth"]
            | four_timepoints_dict["6_months"]
            | four_timepoints_dict["1_year"]
            | four_timepoints_dict["3_years"]
        )

        # Generate the dataframe for visualization
        four_timepoints_overview = pd.DataFrame(
            [four_timepoints_dict.keys(), four_timepoints_dict.values()],
            index=["Time_Point", "Features"],
        ).T.set_index("Time_Point")

        print(
            "The available keywords for grouped features are:",
            four_timepoints_dict.keys(),
        )

        return four_timepoints_dict, four_timepoints_overview

    elif group_type == "five_timepoints":

        five_timepoints_dict = {}

        five_timepoints_dict["at_birth"] = detailed_timepoints_dict["at_birth"]
        five_timepoints_dict["6_months"] = (
            detailed_timepoints_dict["3m"] | detailed_timepoints_dict["6m"]
        )
        five_timepoints_dict["1_year"] = detailed_timepoints_dict["12m"]
        five_timepoints_dict["2_years"] = (
            detailed_timepoints_dict["18m"] | detailed_timepoints_dict["24m"]
        )
        five_timepoints_dict["3_years"] = (
            detailed_timepoints_dict["30m"] | detailed_timepoints_dict["36m"]
        ) - set(df.columns[df.columns.str.contains("Asthma.*yCLA")])

        five_timepoints_dict["all_five_timepoints"] = (
            five_timepoints_dict["at_birth"]
            | five_timepoints_dict["6_months"]
            | five_timepoints_dict["1_year"]
            | five_timepoints_dict["2_years"]
            | five_timepoints_dict["3_years"]
        )

        # Generate the dataframe for visualization
        five_timepoints_overview = pd.DataFrame(
            [five_timepoints_dict.keys(), five_timepoints_dict.values()],
            index=["Time_Point", "Features"],
        ).T.set_index("Time_Point")

        print(
            "The available keywords for grouped features are:",
            five_timepoints_dict.keys(),
        )

        return five_timepoints_dict, five_timepoints_overview

    elif group_type == "four_categories":

        four_categories_dict = {}

        four_categories_dict["genetic"] = (
            detailed_categories_dict["7_parental"]
            | set(df.columns[df.columns.str.contains("Child_Ethnicity")])
        ) - {"Prenatal_Mother_Condition"}

        four_categories_dict["clinic"] = (
            detailed_categories_dict["9_wheeze"]
            | detailed_categories_dict["1_weight"]
            | detailed_categories_dict["3_first10min"]
            | detailed_categories_dict["10_resp"]
            | detailed_categories_dict["12_childspt"]
            | detailed_categories_dict["15_CLA"]
            | set(
                df.columns[
                    df.columns.str.contains(
                        "Apgar_Score|Gest_Days|Stay_Duration|Complications_Birth|Sex|Jaundice_Birth"
                    )
                ]
            )
        ) - (
            {"Wheeze_Father", "Wheeze_Mother"}
            | detailed_timepoints_dict["48m"]
            | detailed_timepoints_dict["60m"]
            | set(df.columns[df.columns.str.contains("Asthma.*yCLA")])
        )

        four_categories_dict["environmental"] = (
            detailed_categories_dict["2_mother_condition"]
            | detailed_categories_dict["4_breastfeeding"]
            | detailed_categories_dict["5_home"]
            | detailed_categories_dict["8_smoke"]
            | detailed_categories_dict["11_antibiotic"]
            | set(
                df.columns[
                    df.columns.str.contains(
                        "Mode_of_delivery|Prenatal_Mother_Condition|Analgesics_usage_delivery|Anesthetic_delivery"
                    )
                ]
            )
        )

        four_categories_dict["other"] = detailed_categories_dict["6_mental"] | set(
            df.columns[df.columns.str.contains("Study_Center|No_of_Pregnancy")]
        )

        # Generate the dataframe for visualization
        four_categories_overview = pd.DataFrame(
            [four_categories_dict.keys(), four_categories_dict.values()],
            index=["Category", "Features"],
        ).T.set_index("Category")

        print(
            "The available keywords for grouped features are:",
            four_categories_dict.keys(),
        )

        return four_categories_dict, four_categories_overview

    elif group_type == "three_categories":

        three_categories_dict = {}

        three_categories_dict["genetic"] = (
            detailed_categories_dict["7_parental"]
            | set(df.columns[df.columns.str.contains("Child_Ethnicity|Sex")])
        ) - {
            "Prenatal_Mother_Condition"
        }  # Sex/Gender from clinic to genetic Advised from integration meeting Mar 10,2022

        three_categories_dict["clinic"] = (
            detailed_categories_dict["9_wheeze"]
            | detailed_categories_dict["1_weight"]
            | detailed_categories_dict["3_first10min"]
            | detailed_categories_dict["10_resp"]
            | detailed_categories_dict["12_childspt"]
            | detailed_categories_dict["15_CLA"]
            | set(
                df.columns[
                    df.columns.str.contains(
                        "Mode_of_delivery|Apgar_Score|Gest_Days|Stay_Duration|Complications_Birth|Jaundice_Birth"
                    )
                ]
            )
        ) - (
            {"Wheeze_Father", "Wheeze_Mother"}
            | detailed_timepoints_dict["48m"]
            | detailed_timepoints_dict["60m"]
            | set(df.columns[df.columns.str.contains("Asthma.*yCLA")])
        )  # Mode_of_delivery from env to clinic Advised from integration meeting Mar 10,2022

        three_categories_dict["environmental"] = (
            detailed_categories_dict["2_mother_condition"]
            | detailed_categories_dict["4_breastfeeding"]
            | detailed_categories_dict["5_home"]
            | detailed_categories_dict["8_smoke"]
            | detailed_categories_dict["11_antibiotic"]
            | detailed_categories_dict[
                "6_mental"
            ]  # Advised from integration meeting Mar 10,2022
            | set(
                df.columns[
                    df.columns.str.contains(
                        "Prenatal_Mother_Condition|Analgesics_usage_delivery|Anesthetic_delivery|Study_Center|No_of_Pregnancy"
                    )
                ]
            )
        )  # Study_Center & No_of_Pregnancy from other to env Advised from integration meeting Mar 10,2022

        # Generate the dataframe for visualization
        three_categories_overview = pd.DataFrame(
            [three_categories_dict.keys(), three_categories_dict.values()],
            index=["Category", "Features"],
        ).T.set_index("Category")

        print(
            "The available keywords for grouped features are:",
            three_categories_dict.keys(),
        )

        return three_categories_dict, three_categories_overview

    else:
        print("Incorrect grouping type, please choose one from:")
        print(
            "four_timepoints |",
            "five_timepoints |",
            "four_categories |",
            "three_categories |",
            "detailed_timepoints |",
            "detailed_categories |",
            "modifiability_categories",
        )

<IPython.core.display.Javascript object>

In [31]:
five_time_dict, five_time_df = feature_grouping_generator(
    df_child, group_type="five_timepoints"
)

four_time_dict, four_time_df = feature_grouping_generator(
    df_child, group_type="four_timepoints"
)

The available keywords for grouped features are: dict_keys(['at_birth', '6_months', '1_year', '2_years', '3_years', 'all_five_timepoints'])
The available keywords for grouped features are: dict_keys(['at_birth', '6_months', '1_year', '3_years', 'all_four_timepoints'])


<IPython.core.display.Javascript object>

In [20]:

pd.set_option("display.max_colwidth", 500)
five_time_dict.values()

TypeError: unhashable type: 'set'

<IPython.core.display.Javascript object>

In [34]:
list(five_time_dict.keys())[:-1]
five_time_df

four_time_df

['at_birth', '6_months', '1_year', '2_years', '3_years']

Unnamed: 0_level_0,Features
Time_Point,Unnamed: 1_level_1
at_birth,"{Jaundice_Birth, Dad_Atopy, Father_Caucasian, Stay_Duration_Hospital, F10min_Free_Flow_Oxygen, F10min_Intubation, Prenatal_Bleeding, Mom_Atopy, Smoke_Prenatal_Secondhand, Apgar_Score_1min, Gest_Days, Sex, F10min_Perineum_suction, Prenatal_Infections, Prenatal_Other_Conditions, Smoke_Prenatal_Maternal, Prenatal_Gestational_Diabetes, F10min_Mask_Ventilation, PSS_36week, Hayfever_Mother, Mother_Asthma, Prenatal_Hypertension, F10min_No_Measure_Needed, Hayfever_Father, Wheeze_Father, Weight_0m, A..."
6_months,"{Weight_for_age_3m, Home_DEP_3m, Weight_3m, Epi_Noncold_Wheeze_3m, Wheeze_6m, BF_1m, Home_DNBP_3m, Wheeze_3m, Home_Furry_Pets_6m, Noncold_Wheeze_3m, CESD_6m, Cumulative_Wheeze_6m, Noncold_Wheeze_6m, Home_DEHP_3m, Cumulative_Wheeze_3m, BF_3m, Home_BzBP_3m, BF_6m, PSS_6m, Epi_Noncold_Wheeze_6m, BF_Status_3m, Home_New_Furnitures_6m, Home_Presence_Smoke_6m, BF_Status_6m, Home_DiBP_3m}"
1_year,"{Wheeze_1y, Time_of_AntibioticsUsage_12m, Cumulative_Wheeze_12m, CESD_12m, Child_Food_1y, BF_9m, Crackles_1yCLA, Prolonged_Expiration_1yCLA, Epi_Noncold_Wheeze_1y, Child_Inhalant_1y, BF_12m, Child_Atopy_1y, Antibiotics_Usage_12m, Number_of_AntibioticsCourse_12m, Weight_12m, Recurrent_Wheeze_1y, Weight_for_age_12m, Noncold_Wheeze_1y, PSS_12m, Wheeze_1yCLA}"
2_years,"{BF_18m, CESD_24m, Noncold_Wheeze_2y, Wheeze_2yh, Cumulative_Wheeze_18m, Wheeze_18m, Cumulative_Wheeze_24m, BF_Implied_Duration, Epi_Noncold_Wheeze_2y, BF_24m, Noncold_Wheeze_18m, PSS_18m, Wheeze_2y, PSS_24m, Epi_Noncold_Wheeze_18m, CESD_18m}"
3_years,"{Systolic_BP_3yCLA, Weight_for_age_36m, Pulse_Rate_3yCLA, Child_Inhalant_3y, Wheeze_3yCLA, Child_Atopy_3y, Noncold_Wheeze_2hy, Child_Food_3y, Weight_36m, Epi_Noncold_Wheeze_3y, Epi_Noncold_Wheeze_2hy, Noncold_Wheeze_3y, Cumulative_Wheeze_30m, Recurrent_Wheeze_3y, Wheeze_3y, Diastolic_BP_3yCLA, Cumulative_Wheeze_36m}"
all_five_timepoints,"{Jaundice_Birth, Father_Caucasian, Stay_Duration_Hospital, Weight_3m, Mom_Atopy, Cumulative_Wheeze_18m, Home_Furry_Pets_6m, Wheeze_1y, Prenatal_Gestational_Diabetes, PSS_36week, Mother_Asthma, Wheeze_Father, AD_Mother, Child_Food_1y, Prolonged_Expiration_1yCLA, Prenatal_Hypotension, CESD_18m, Respiratory_Infections, CESD_24m, Noncold_Wheeze_2y, Epi_Noncold_Wheeze_3m, Mode_of_delivery, Mom_Inhalant, Wheeze_18m, Child_Food_3y, Cumulative_Wheeze_24m, Epi_Noncold_Wheeze_3y, Noncold_Wheeze_3y, Pr..."


Unnamed: 0_level_0,Features
Time_Point,Unnamed: 1_level_1
at_birth,"{Jaundice_Birth, Dad_Atopy, Father_Caucasian, Stay_Duration_Hospital, F10min_Free_Flow_Oxygen, F10min_Intubation, Prenatal_Bleeding, Mom_Atopy, Smoke_Prenatal_Secondhand, Apgar_Score_1min, Gest_Days, Sex, F10min_Perineum_suction, Prenatal_Infections, Prenatal_Other_Conditions, Smoke_Prenatal_Maternal, Prenatal_Gestational_Diabetes, F10min_Mask_Ventilation, PSS_36week, Hayfever_Mother, Mother_Asthma, Prenatal_Hypertension, F10min_No_Measure_Needed, Hayfever_Father, Wheeze_Father, Weight_0m, A..."
6_months,"{Weight_for_age_3m, Home_DEP_3m, Weight_3m, Epi_Noncold_Wheeze_3m, Wheeze_6m, BF_1m, Home_DNBP_3m, Wheeze_3m, Home_Furry_Pets_6m, Noncold_Wheeze_3m, CESD_6m, Cumulative_Wheeze_6m, Noncold_Wheeze_6m, Home_DEHP_3m, Cumulative_Wheeze_3m, BF_3m, Home_BzBP_3m, BF_6m, PSS_6m, Epi_Noncold_Wheeze_6m, BF_Status_3m, Home_New_Furnitures_6m, Home_Presence_Smoke_6m, BF_Status_6m, Home_DiBP_3m}"
1_year,"{Wheeze_1y, Time_of_AntibioticsUsage_12m, Cumulative_Wheeze_12m, CESD_12m, Child_Food_1y, BF_9m, Crackles_1yCLA, Prolonged_Expiration_1yCLA, Epi_Noncold_Wheeze_1y, Child_Inhalant_1y, BF_12m, Child_Atopy_1y, Antibiotics_Usage_12m, Number_of_AntibioticsCourse_12m, Weight_12m, Recurrent_Wheeze_1y, Weight_for_age_12m, Noncold_Wheeze_1y, PSS_12m, Wheeze_1yCLA}"
3_years,"{BF_18m, Weight_for_age_36m, Wheeze_2yh, Cumulative_Wheeze_18m, BF_Implied_Duration, Diastolic_BP_3yCLA, BF_24m, Noncold_Wheeze_18m, Systolic_BP_3yCLA, Noncold_Wheeze_2hy, Epi_Noncold_Wheeze_18m, Weight_36m, Recurrent_Wheeze_3y, Wheeze_3yCLA, CESD_18m, CESD_24m, Noncold_Wheeze_2y, Child_Inhalant_3y, Child_Atopy_3y, Wheeze_18m, Child_Food_3y, Cumulative_Wheeze_24m, Epi_Noncold_Wheeze_3y, Epi_Noncold_Wheeze_2hy, Noncold_Wheeze_3y, Epi_Noncold_Wheeze_2y, PSS_18m, Wheeze_2y, Pulse_Rate_3yCLA, PS..."
all_four_timepoints,"{Jaundice_Birth, Father_Caucasian, Stay_Duration_Hospital, F10min_Free_Flow_Oxygen, Prenatal_Bleeding, Weight_3m, Mom_Atopy, Smoke_Prenatal_Secondhand, Wheeze_2yh, Home_Furry_Pets_6m, Cumulative_Wheeze_18m, Wheeze_1y, Prenatal_Infections, Prenatal_Gestational_Diabetes, PSS_36week, Prenatal_Hypertension, Mother_Asthma, F10min_No_Measure_Needed, Systolic_BP_3yCLA, Wheeze_Father, Cumulative_Wheeze_12m, AD_Mother, CESD_12m, Child_Food_1y, BF_9m, Noncold_Wheeze_2hy, Prolonged_Expiration_1yCLA, Re..."


<IPython.core.display.Javascript object>

In [40]:
list(five_time_dict.keys())[:-1]

['at_birth', '6_months', '1_year', '2_years', '3_years']

<IPython.core.display.Javascript object>

In [47]:
pd.concat(
    [
        pd.Series(list(five_time_dict["at_birth"])),
        pd.Series(list(five_time_dict["1_year"])),
    ]
)

0              Jaundice_Birth
1                   Dad_Atopy
2            Father_Caucasian
3      Stay_Duration_Hospital
4     F10min_Free_Flow_Oxygen
               ...           
15        Recurrent_Wheeze_1y
16         Weight_for_age_12m
17          Noncold_Wheeze_1y
18                    PSS_12m
19               Wheeze_1yCLA
Length: 88, dtype: object

<IPython.core.display.Javascript object>

In [60]:
df_child.iloc[0]
pd.concat([df_child.iloc[0], df_child.iloc[1]], axis=1).T

Sex                      F
Gest_Days            273.0
Weight_0m            3.255
Weight_3m              NaN
Weight_12m            11.8
                     ...  
Child_Food_3y          0.0
Child_Inhalant_3y      0.0
Child_Atopy_5y         0.0
Child_Food_5y          0.0
Child_Inhalant_5y      0.0
Name: 20001, Length: 179, dtype: object

Unnamed: 0,Sex,Gest_Days,Weight_0m,Weight_3m,Weight_12m,Weight_36m,Weight_60m,Weight_for_age_0m,Weight_for_age_3m,Weight_for_age_12m,...,Child_Ethnicity,Child_Atopy_1y,Child_Food_1y,Child_Inhalant_1y,Child_Atopy_3y,Child_Food_3y,Child_Inhalant_3y,Child_Atopy_5y,Child_Food_5y,Child_Inhalant_5y
20001,F,273.0,3.255,,11.8,20.0,25.5,0.05,,0.09,...,,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
20002,M,275.0,3.195,,,,,-0.31,,,...,,,,,,,,,,


<IPython.core.display.Javascript object>

In [62]:
pd.concat([df_child.iloc[0], df_child.iloc[2]], axis=1).T

Unnamed: 0,Sex,Gest_Days,Weight_0m,Weight_3m,Weight_12m,Weight_36m,Weight_60m,Weight_for_age_0m,Weight_for_age_3m,Weight_for_age_12m,...,Child_Ethnicity,Child_Atopy_1y,Child_Food_1y,Child_Inhalant_1y,Child_Atopy_3y,Child_Food_3y,Child_Inhalant_3y,Child_Atopy_5y,Child_Food_5y,Child_Inhalant_5y
20001,F,273.0,3.255,,11.8,20.0,25.5,0.05,,0.09,...,,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
20003,F,279.0,4.49,,11.0,,,2.47,,,...,,,,,,,,,,


<IPython.core.display.Javascript object>

In [66]:
[df_child.iloc[i] for i in range(5)]

[Sex                      F
 Gest_Days            273.0
 Weight_0m            3.255
 Weight_3m              NaN
 Weight_12m            11.8
                      ...  
 Child_Food_3y          0.0
 Child_Inhalant_3y      0.0
 Child_Atopy_5y         0.0
 Child_Food_5y          0.0
 Child_Inhalant_5y      0.0
 Name: 20001, Length: 179, dtype: object,
 Sex                      M
 Gest_Days            275.0
 Weight_0m            3.195
 Weight_3m              NaN
 Weight_12m             NaN
                      ...  
 Child_Food_3y          NaN
 Child_Inhalant_3y      NaN
 Child_Atopy_5y         NaN
 Child_Food_5y          NaN
 Child_Inhalant_5y      NaN
 Name: 20002, Length: 179, dtype: object,
 Sex                      F
 Gest_Days            279.0
 Weight_0m             4.49
 Weight_3m              NaN
 Weight_12m            11.0
                      ...  
 Child_Food_3y          NaN
 Child_Inhalant_3y      NaN
 Child_Atopy_5y         NaN
 Child_Food_5y          NaN
 Child_Inhalant_5y  

<IPython.core.display.Javascript object>

In [69]:
pd.concat([df_child.iloc[i] for i in range(5)], axis=1).T

Unnamed: 0,Sex,Gest_Days,Weight_0m,Weight_3m,Weight_12m,Weight_36m,Weight_60m,Weight_for_age_0m,Weight_for_age_3m,Weight_for_age_12m,...,Child_Ethnicity,Child_Atopy_1y,Child_Food_1y,Child_Inhalant_1y,Child_Atopy_3y,Child_Food_3y,Child_Inhalant_3y,Child_Atopy_5y,Child_Food_5y,Child_Inhalant_5y
20001,F,273.0,3.255,,11.8,20.0,25.5,0.05,,0.09,...,,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
20002,M,275.0,3.195,,,,,-0.31,,,...,,,,,,,,,,
20003,F,279.0,4.49,,11.0,,,2.47,,,...,,,,,,,,,,
20005,F,280.0,2.88,,,,,-0.8,,,...,,,,,,,,,,
20006,F,271.0,3.335,,11.1,17.8,20.1,0.22,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0


<IPython.core.display.Javascript object>