In [41]:
import pandas as pd
import psycopg2

conn = psycopg2.connect(dbname = 'opportunity_youth')

sql_str = "SELECT pwgtp, SCH, ESR, puma, agep FROM pums_2017 WHERE SCH = '1' and (ESR = '3' or ESR = '6') and (puma between '11610' and '11615') and (agep between '16' and '24')"
puma_num_totals = pd.read_sql(f"""SELECT puma, SUM(pwgtp) AS TotalPeople FROM ({sql_str}) AS a GROUP BY puma""", conn)

def youth_in_region(conn, conditions = ''):
    """
    Gets the approximate population from the pums 2017 database that are youth in south king county that also follow the given sql conditions

    Parameters
    ----------
    conn: connection to database
    conditions: extra sql conditions for the rows to follow

    Returns
    -------
    returns: sum of the population weights of the desired rows (this represents the population with the given conditions)
    """
    if conditions: conditions = " and " + conditions
    return pd.read_sql(f"""SELECT SUM(pwgtp) FROM pums_2017 WHERE (puma between '11610' and '11615') and (agep between '16' and '24'){conditions}""", conn)


def race_breakdown(conn, race_check_statement = ''):
    """
    Does sql calls that will get information about race breakdown for youth in south king county

    Parameters
    ----------
    race_check_statement: sql statement that will identify a certain race
    conn: Database connection

    Returns
    -------
    returns: dictionary of 4 population sums that match
             https://roadmapproject.org/wp-content/uploads/2018/09/Opportunity-Youth-2016-Data-Brief-v2.pdf
             page 2 of the race table given the race check statement
    """ 
    df_dict = {}
    has_no_ged = """(SCHL = '15' or SCHL = '01' or SCHL = '14' or SCHL = '13' or SCHL = '12' or SCHL = '11' or SCHL = '10' or SCHL = '9')"""
    df_dict['total'] =                      youth_in_region(conn, f"""{race_check_statement}""") # total of a race
    if race_check_statement: race_check_statement += ' and '
    df_dict['opportunity youth'] =          youth_in_region(conn, f"""{race_check_statement}SCH = '1' and (ESR = '3' or ESR = '6')""") # total OY of race
    df_dict['working without diploma'] =    youth_in_region(conn, f"""{race_check_statement}(SCH = '1')
                                                                                             and {has_no_ged}
                                                                                             and (ESR != '3' and ESR != '6')""") # working with no GED
    df_dict['not opportunity youth'] =      youth_in_region(conn, f"""{race_check_statement}((not {has_no_ged}
                                                                                             and (ESR != '3' and ESR != '6'))
                                                                                             or (SCH != '1'))""") # The rest of the age pop

    return df_dict



youth_in_region(conn, """SCH = '1' and (ESR = '3' or ESR = '6')""") # total
hisp = race_breakdown(conn, """RAC1P  = '8' and HISP != '01'""")
black = race_breakdown(conn, """RAC1P = '2'""")
white = race_breakdown(conn, """RAC1P = '1'""")
non_white = race_breakdown(conn, """RAC1P != '1' and RAC1P != '2' and RAC1P != '8' and HISP = '01'""")
all_race = {}
for i in hisp.keys(): all_race[i] = hisp[i] + black[i] + white[i] + non_white[i]


conn.close()

puma_num_totals['totalpeople'].sum()

10614.0

In [255]:
import pandas as pd
import psycopg2

"""
Notes for making use of these methods

Types of methods
Split Method:     Splits a dataframe into multiple data frames with certain conditions. Returns a dictionary of these splits
Filter Method:    Filters a dataframe to follow a certain trait.
Breakdown Method: Takes a dataframe and breaks up the data into their respective categories and sums up the weighted population
                  This is like a split method but sums up the weights.
!!!
Breakdown must make a total key if you want a table with percentage
!!!
All these methods are also usefull outside the breakdow_by_split method

breakdown_by_split will split a given dataframe, filter it (optional), and break it down
"""

def breakdown_by_split(df, split, breakdown, optional_filter = None):
    """
    Gives the oy breakdown by race for hispanic, black, white, and other

    Parameters
    ----------
    df: dataframe to be broken into race and then oportunity youth stats
    split: function that with split the tables into multiple tables
    breakdown: function that will break the tables into subcategories an get the sums
    optional_filter: if a function is given, this will filter each table after being split

    Returns
    -------
    Dictionary of dictionarys where the keys are the race and the values are the breakdown sums
    """
    dct = {}
    groups = split(df)
    for group in groups.keys():
        if optional_filter:
            dct[group] = breakdown(optional_filter(groups[group]))
        else:
            dct[group] = breakdown(groups[group])
    return dct

def filter_oy(df):
    """
    Gets OY from df
    """
    return df[(df['sch'] == '1') & ((df['esr'] == '3') | (df['esr'] == '6'))]

def split_by4_race(df):
    """
    Breaks the dataframe into hispanic, black, white, and other

    Parameters
    ----------
    df: data frame to be split up

    Returns
    -------
    dictionary of daraframes split by race
    """
    dct = {}
    dct['total']            = df.copy()
    dct['hispanic']         = df[(df['hisp'] != '01')]
    dct['african american'] = df[(df['rac1p'] == '2') & (df['hisp'] == '01')]
    dct['white']            = df[(df['rac1p'] == '1') & (df['hisp'] == '01')]
    dct['other']            = df[(df['rac1p'] != '1') & (df['rac1p'] != '2') & (df['hisp'] == '01')]
    return dct

def split_by_age(df):
    """
    Breaks the dataframe into 16-18, 19-21, 22-24

    Parameters
    ----------
    df: data frame to be split up

    Returns
    -------
    dictionary of daraframes split by age
    """
    dct = {}
    dct['total'] = df.copy()
    dct['16-18'] = df[((df['agep'] >= 16) & (df['agep'] <= 18))]
    dct['19-21'] = df[((df['agep'] >= 19) & (df['agep'] <= 21))]
    dct['22-24'] = df[((df['agep'] >= 22) & (df['agep'] <= 24))]
    return dct


def split_by_all_race(df):
    """
    Breaks the dataframe into many races

    Parameters
    ----------
    df: data frame to be split up

    Returns
    -------
    dictionary of daraframes split by race
    """
    dct = {}
    dct['total']                                = df.copy()
    dct['native american']                      = df[(df['rac1p'].isin(['3', '4', '5']) & (df['hisp'] == '01'))]
    dct['pacific islander and native hawaiian'] = df[(df['rac1p'].isin(['7']) & (df['hisp'] == '01'))]
    dct['african american']                     = df[(df['rac1p'].isin(['2']) & (df['hisp'] == '01'))]
    dct['hispanic']                             = df[((df['hisp'] != '01') & (df['rac1p'].isin(['8'])))] 
    dct['asian']                                = df[(df['rac1p'].isin(['6']) & (df['hisp'] == '01'))]
    dct['white']                                = df[(df['rac1p'].isin(['1']) & (df['hisp'] == '01'))]
    dct['two or more']                          = df[(df['rac1p'].isin(['9']) & (df['hisp'] == '01'))]
    dct['other']                                = df[(df['rac1p'].isin(['8']))]
    return dct

def oy_breakdown(df):
    """
    Breaks the dataframe into 4 dataframes and gives the number of people in that data frame the 4 dataframes are total, oy, working without diploma, not oy

    Parameters
    ----------
    df: dataframe to be brokendown

    Returns
    -------
    dictionary of the specified dataframe totals
    """
    dct = {}
    dct['total'] =      df['pwgtp'].sum() # total sum
    dct['oy'] =         filter_oy(df)['pwgtp'].sum() # not working and not in school
    dct['working'] =    df[(df['sch'] == '1')
                            & (df['schl'].isin(['15', '14', '13', '12', '11', '10', '09', '08', '07', '06', '05', '04', '03', '01']))
                            & ((df['esr'] != '3') & (df['esr'] != '6'))]['pwgtp'].sum() # working with no GED
    dct['not oy'] =     df[((~df['schl'].isin(['15', '14', '13', '12', '11', '10', '09', '08', '07', '06', '05', '04', '03', '01']) & ((df['esr'] != '3') & (df['esr'] != '6')))
                            | (df['sch'] != '1'))]['pwgtp'].sum() # either in school or working with a GED
    return dct

def education_breakdown(df):
    """
    Breaks the dataframe into education acheivment totals

    Parameters
    ----------
    df: dataframe to be broken into education achievement

    Returns
    -------
    Dictionary of the total number of people with education achievement level
    """
    dct = {}
    dct['total']        = df.copy()['pwgtp'].sum()
    dct['no diploma']   = df[df['schl'].isin(['15', '14', '13', '12', '11', '10', '09', '08', '07', '06', '05', '04', '03', '02', '01'])]['pwgtp'].sum()
    dct['diploma']      = df[df['schl'].isin(['16', '17'])]['pwgtp'].sum()
    dct['some college'] = df[df['schl'].isin(['18', '19'])]['pwgtp'].sum()
    dct['degree']       = df[df['schl'].isin(['20', '21', '22', '23', '24'])]['pwgtp'].sum()

    return dct





def make_table_with_percentage(dct): # TODO Comment on this
    """
    Makes a table with percentage with the top row being the total of entries bellow
    """
    df = __get_as_df(dct)
    return __add_percentage(df)

def make_table_without_percentage(dct):
    """
    Makes a table from a dictionary of dictionarys
    """
    return __get_as_df(dct)

def __get_as_df(dct):
    """
    Helper for make_table_with_percentage
    """
    df = pd.DataFrame(dct)
    df.columns = df.columns.str.title()
    cols = list(df.columns)
    if 'Total' in cols:
        cols.remove('Total')
        df = df[cols + ['Total']]
    df.index = df.index.str.title()
    return df

def __add_percentage(df):
    """
    Helper for make_table_with_percentage
    """
    new_frame = df.copy()
    for column in new_frame:
        new_column = [str(int(round(100 * element / new_frame[column][0])))
                      + '% ' + str(int(element)) for element in new_frame[column]]
        new_frame[column] = new_column
    return new_frame

def page_2_dicts(df): # TODO Better comments!
    # Makes dictionarys for all the tables on page 2
    four_races     = breakdown_by_split(df, split_by4_race,    oy_breakdown)
    age            = breakdown_by_split(df, split_by_age,      oy_breakdown)
    edu_and_age    = breakdown_by_split(df, split_by_age,      education_breakdown, filter_oy)
    oy_by_all_race = breakdown_by_split(df, split_by_all_race, oy_breakdown)
    
    pop_by_race = {i : oy_by_all_race[i]['total'] for i in oy_by_all_race}  #  
    oy_by_race = {i : oy_by_all_race[i]['oy'] for i in oy_by_all_race}      # This transposes the dictionary
    race = {'Population Total' : pop_by_race, 'OY Total' : oy_by_race}      # 

    return four_races, age, edu_and_age, race

def page_2_tables(df): # TODO COMMENT MORE
    """
    df: dataframe of region and age range
    returns 4 dataframes that match the table on page 2 of the reference 2016 document
    """
    four_race_dict, age_dict, edu_dict, races_dict = page_2_dicts(df)

    four_race = make_table_with_percentage(four_race_dict)
    age       = make_table_with_percentage(age_dict)
    education = make_table_with_percentage(edu_dict)
    all_races = __fix_all_race_table(pd.DataFrame(races_dict))

    return four_race, age, education, all_races

def __fix_all_race_table(all_races): # TODO COMMENT!!!!
    """
    Helper method to add percentages to the all race table
    """
    new_col = ['100% ' + str(int(all_races['Population Total'][0]))]
    for index, element in enumerate(all_races['Population Total'][1:]):
        ratio = all_races['OY Total'][index + 1] / element
        new_col.append(str(int(round(100 * ratio))) + '% ' + str(int(element)))
    all_races['Population Total'] = new_col

    all_races['OY Total'] = [str(int(round(100 * element / all_races['OY Total'][0]))) + '% ' + str(int(element)) for element in all_races['OY Total']]
    all_races.index = all_races.index.str.title()
    return all_races


def get_youth_by_puma(start_puma = 11610, end_puma = 11615, dbname = 'opportunity_youth'):
    """
    Gives a table of youth from a given PUMA Range based on a given database name.
    Defaults are set for each parameter
    """
    conn = psycopg2.connect(dbname = dbname)
    df = pd.read_sql(f"SELECT * FROM pums_2017 WHERE (puma between '{start_puma}' and '{end_puma}') and (agep between '16' and '24')", conn)
    conn.close()
    return df

def get_oy_by_puma(start_puma = 11610, end_puma = 11615, dbname = 'opportunity_youth'):
    """
    Will Give a table of the OY for each PUMA in a range of pumas given from a given database
    Defaults are set for each parameter
    """
    sql_str = f"""SELECT pwgtp, SCH, ESR, puma, agep
                  FROM pums_2017
                  WHERE SCH = '1' and (ESR = '3' or ESR = '6') and (puma between '{start_puma}' and '{end_puma}') and (agep between '16' and '24')"""
    conn = psycopg2.connect(dbname = dbname)
    oy_by_puma = pd.read_sql(f"""SELECT puma, SUM(pwgtp) AS TotalPeople FROM ({sql_str}) AS a GROUP BY puma""", conn)
    conn.close()
    return oy_by_puma


In [256]:
# initialize df with rows of youth in the area
total_youth_unweighted = get_youth_by_puma(start_puma = 11610, end_puma = 11615, dbname = 'opportunity_youth')
oy_by_puma = get_oy_by_puma(start_puma = 11610, end_puma = 11615, dbname = 'opportunity_youth')

In [257]:
oy_by_puma

Unnamed: 0,puma,totalpeople
0,11610,1853.0
1,11611,2038.0
2,11612,1977.0
3,11613,2006.0
4,11614,1530.0
5,11615,1210.0


In [258]:
four_race, age, education, all_races = page_2_tables(total_youth_unweighted)
four_race_dct, age_dct, education_dct, all_races_dct = page_2_dicts(total_youth_unweighted)

In [259]:
four_race

Unnamed: 0,Hispanic,African American,White,Other,Total
Total,100% 15942,100% 8630,100% 38759,100% 22552,100% 85883
Oy,13% 2133,15% 1315,11% 4392,12% 2774,12% 10614
Working,10% 1542,3% 287,3% 1165,2% 486,4% 3480
Not Oy,77% 12267,81% 7028,86% 33202,86% 19292,84% 71789


In [260]:
age

Unnamed: 0,16-18,19-21,22-24,Total
Total,100% 30141,100% 25486,100% 30256,100% 85883
Oy,6% 1815,15% 3902,16% 4897,12% 10614
Working,1% 449,5% 1400,5% 1631,4% 3480
Not Oy,92% 27877,79% 20184,78% 23728,84% 71789


In [261]:
education

Unnamed: 0,16-18,19-21,22-24,Total
Total,100% 1815,100% 3902,100% 4897,100% 10614
No Diploma,50% 916,28% 1112,28% 1349,32% 3377
Diploma,43% 781,56% 2176,44% 2135,48% 5092
Some College,7% 118,13% 521,20% 1000,15% 1639
Degree,0% 0,2% 93,8% 413,5% 506


In [264]:
all_races

Unnamed: 0,Population Total,OY Total
Total,100% 85883,100% 10614
Native American,41% 851,3% 347
Pacific Islander And Native Hawaiian,20% 1814,3% 360
African American,15% 8630,12% 1315
Hispanic,14% 7003,9% 949
Asian,9% 13239,11% 1189
White,11% 38759,41% 4392
Two Or More,14% 6353,8% 862
Other,13% 7298,9% 965


In [265]:
df = total_youth_unweighted.copy()
dct = breakdown_by_split(df, lambda x: {'male' : x[x['sex'] == '1'], 'female' : x[x['sex'] == '2']}, oy_breakdown)
make_table_with_percentage(dct)

Unnamed: 0,Male,Female
Total,100% 45286,100% 40597
Oy,11% 4949,14% 5665
Working,5% 2135,3% 1345
Not Oy,84% 38202,83% 33587


In [266]:
dct = breakdown_by_split(df, lambda x: {'male' : x[x['sex'] == '1'], 'female' : x[x['sex'] == '2']}, education_breakdown)
make_table_with_percentage(dct)

Unnamed: 0,Male,Female
Total,100% 45286,100% 40597
No Diploma,37% 16603,36% 14808
Diploma,28% 12799,25% 10269
Some College,24% 10796,23% 9405
Degree,11% 5088,15% 6115


In [267]:
dct = breakdown_by_split(df, lambda x: {'self care difficulty' : x[x['ddrs'] == '1'], 'self care able' : x[x['ddrs'] == '2']}, oy_breakdown)
make_table_with_percentage(dct)

Unnamed: 0,Self Care Difficulty,Self Care Able
Total,100% 680,100% 85203
Oy,37% 250,12% 10364
Working,8% 52,4% 3428
Not Oy,56% 378,84% 71411


In [268]:
dct = breakdown_by_split(df, lambda x: {'hearing difficulty' : x[x['dear'] == '1'], 'hearing able' : x[x['dear'] == '2']}, oy_breakdown)
make_table_with_percentage(dct)

Unnamed: 0,Hearing Difficulty,Hearing Able
Total,100% 723,100% 85160
Oy,19% 137,12% 10477
Working,14% 103,4% 3377
Not Oy,67% 483,84% 71306


In [269]:
dct = breakdown_by_split(df, lambda x: {'vision difficulty' : x[x['deye'] == '1'], 'vision able' : x[x['deye'] == '2']}, oy_breakdown)
make_table_with_percentage(dct)

Unnamed: 0,Vision Difficulty,Vision Able
Total,100% 921,100% 84962
Oy,7% 68,12% 10546
Working,1% 9,4% 3471
Not Oy,92% 844,84% 70945


In [270]:
dct = breakdown_by_split(df, lambda x: {'independent living difficulty' : x[x['dout'] == '1'], 'independent living able' : x[x['dout'] == '2']}, oy_breakdown)
make_table_with_percentage(dct)

Unnamed: 0,Independent Living Difficulty,Independent Living Able
Total,100% 2355,100% 83528
Oy,42% 997,12% 9617
Working,4% 100,4% 3380
Not Oy,53% 1258,84% 70531


In [271]:
dct = breakdown_by_split(df, lambda x: {'ambulatory difficulty' : x[x['dphy'] == '1'], 'ambulatory able' : x[x['dphy'] == '2']}, oy_breakdown)
make_table_with_percentage(dct)

Unnamed: 0,Ambulatory Difficulty,Ambulatory Able
Total,100% 633,100% 85250
Oy,37% 234,12% 10380
Working,0% 0,4% 3480
Not Oy,63% 399,84% 71390
