# Pandas excercises (Boolean masks)
## data: [assets/NISPUF17.csv](assets/NISPUF17.csv)
- 2017 data on immunizations from the CDC

In [6]:
import pandas as pd
df = pd.read_csv('assets/NISPUF17.csv', index_col = 1)

## `proportion_of_education` 
Function that returns the proportion of children in the dataset who had a mother with the education levels equal to less than high school (<12), high school (12), more than high school but not a college graduate (>12) and college degree.


In [7]:
def proportion_of_education():
    total = len(df.index)
    results = {}
    
    #less than hg
    less_hg_mask = df['EDUC1'] == 1 
    results['less than high school'] = len(df[less_hg_mask].index) / total
    
    #hg
    hg_mask = df['EDUC1'] == 2
    results['high school'] = len(df[hg_mask].index) / total
    
    #more than high school but not college
    more_hg_mask = df['EDUC1'] == 3
    results['more than high school but not college'] = len(df[more_hg_mask].index) / total
    
    #college
    college_mask = df['EDUC1'] == 4
    results['college'] = len(df[college_mask ].index) / total
    
    return results

    

In [8]:
print(proportion_of_education())

{'less than high school': 0.10202002459160373, 'high school': 0.172352011241876, 'more than high school but not college': 0.24588090637625154, 'college': 0.47974705779026877}


## Exploring the relationship between being fed breastmilk as a child and getting a seasonal influenza vaccine

Returns a tuple of the average number of influenza vaccines for those children we know received breastmilk as a child and those who know did not.


In [11]:
def average_influenza_doses():
    #breast fed
    breast_fed_mask = df['CBF_01'] == 1
    population = len(df[breast_fed_mask].index)
    bf_average_influenza_shots = df[breast_fed_mask]['P_NUMFLU'].sum()/population
    
    #not bf
    not_breast_fed_mask = df['CBF_01'] == 2
    population = len(df[not_breast_fed_mask].index)
    not_bf_average_influenza_shots = df[not_breast_fed_mask]['P_NUMFLU'].sum()/population
    
    return(bf_average_influenza_shots, not_bf_average_influenza_shots)


In [12]:
print(average_influenza_doses())

(1.0298833518816206, 0.7804161566707466)


## Is there a link btw vaccine effectiveness and sex of the child?
Calculates the ratio of the number of children who contracted chickenpox but were vaccinated against it (at least one varicella dose) versus those who were vaccinated but did not contract chicken pox. Returns results by sex. 


In [14]:
def chickenpox_by_sex():
    results = {}
    
    #male and vaccinated
    male_vaccinated_mask = (df['SEX'] == 1) & (df['P_NUMVRC']>0) 
    male_vaccinated_df = df[male_vaccinated_mask]
    #contracted
    contracted_population = len(male_vaccinated_df[df['HAD_CPOX'] == 1].index)
    not_contracted_population = len(male_vaccinated_df[df['HAD_CPOX'] == 2].index)
    ratio = contracted_population/ not_contracted_population
    results["male"] = ratio
    
    #female and vaccinated
    female_vaccinated_mask = (df['SEX'] != 1) & (df['P_NUMVRC'] > 0) 
    female_vaccinated_df = df[female_vaccinated_mask]
    #contracted
    contracted_population = len(female_vaccinated_df[df['HAD_CPOX'] == 1].index)
    not_contracted_population = len(female_vaccinated_df[df['HAD_CPOX'] == 2].index)
    ratio = contracted_population/ not_contracted_population
    results["female"] = ratio
    
    return results


In [15]:
print(chickenpox_by_sex())

{'male': 0.009675583380762664, 'female': 0.0077918259335489565}


  contracted_population = len(male_vaccinated_df[df['HAD_CPOX'] == 1].index)
  not_contracted_population = len(male_vaccinated_df[df['HAD_CPOX'] == 2].index)
  contracted_population = len(female_vaccinated_df[df['HAD_CPOX'] == 1].index)
  not_contracted_population = len(female_vaccinated_df[df['HAD_CPOX'] == 2].index)
