In [669]:
import pandas as pd
import psycopg2

conn = psycopg2.connect(dbname = 'opportunity_youth')

sql_str = "SELECT pwgtp, SCH, ESR, puma, agep FROM pums_2017 WHERE SCH = '1' and (ESR = '3' or ESR = '6') and (puma between '11610' and '11615') and (agep between '16' and '24')"
oum_num_totals = pd.read_sql(f"""SELECT puma, SUM(pwgtp) AS TotalPeople FROM ({sql_str}) AS a GROUP BY puma""", conn)

def youth_in_region(conn, conditions = ''):
    """
    Gets the approximate population from the pums 2017 database that are youth in south king county that also follow the given sql conditions

    Parameters
    ----------
    conn: connection to database
    conditions: extra sql conditions for the rows to follow

    Returns
    -------
    returns: sum of the population weights of the desired rows (this represents the population with the given conditions)
    """
    if conditions: conditions = " and " + conditions
    return pd.read_sql(f"""SELECT SUM(pwgtp) FROM pums_2017 WHERE (puma between '11610' and '11615') and (agep between '16' and '24'){conditions}""", conn)


def race_breakdown(conn, race_check_statement = ''):
    """
    Does sql calls that will get information about race breakdown for youth in south king county

    Parameters
    ----------
    race_check_statement: sql statement that will identify a certain race
    conn: Database connection

    Returns
    -------
    returns: dictionary of 4 population sums that match
             https://roadmapproject.org/wp-content/uploads/2018/09/Opportunity-Youth-2016-Data-Brief-v2.pdf
             page 2 of the race table given the race check statement
    """ 
    df_dict = {}
    has_no_ged = """(SCHL = '15' or SCHL = '01' or SCHL = '14' or SCHL = '13' or SCHL = '12' or SCHL = '11' or SCHL = '10' or SCHL = '9')"""
    df_dict['total'] =                      youth_in_region(conn, f"""{race_check_statement}""") # total of a race
    if race_check_statement: race_check_statement += ' and '
    df_dict['opportunity youth'] =          youth_in_region(conn, f"""{race_check_statement}SCH = '1' and (ESR = '3' or ESR = '6')""") # total OY of race
    df_dict['working without diploma'] =    youth_in_region(conn, f"""{race_check_statement}(SCH = '1')
                                                                                             and {has_no_ged}
                                                                                             and (ESR != '3' and ESR != '6')""") # working with no GED
    df_dict['not opportunity youth'] =      youth_in_region(conn, f"""{race_check_statement}((not {has_no_ged}
                                                                                             and (ESR != '3' and ESR != '6'))
                                                                                             or (SCH != '1'))""") # The rest of the age pop

    return df_dict


youth_in_region(conn, """SCH = '1' and (ESR = '3' or ESR = '6')""") # total
hisp = race_breakdown(conn, """RAC1P  = '8' and HISP != '01'""")
black = race_breakdown(conn, """RAC1P = '2'""")
white = race_breakdown(conn, """RAC1P = '1'""")
non_white = race_breakdown(conn, """RAC1P != '1' and RAC1P != '2' and RAC1P != '8' and HISP = '01'""")
all_race = {}
for i in hisp.keys(): all_race[i] = hisp[i] + black[i] + white[i] + non_white[i]

conn.close()

In [664]:
import pandas as pd
import psycopg2

conn = psycopg2.connect(dbname = 'opportunity_youth')

def get_oy(df):
    """
    Gets OY from df
    """
    return df[(df['sch'] == '1') & ((df['esr'] == '3') | (df['esr'] == '6'))]

def oy_breakdown(df):
    """
    Breaks the dataframe into 4 dataframes and gives the number of people in that data frame the 4 dataframes are total, oy, working without diploma, not oy

    Parameters
    ----------
    df: dataframe to be brokendown

    Returns
    -------
    dictionary of the specified dataframe totals
    """
    dct = {}
    dct['total'] =      df['pwgtp'].sum() # total sum
    dct['oy'] =         get_oy(df)['pwgtp'].sum() # not working and not in school
    dct['working'] =    df[(df['sch'] == '1')
                            & (df['schl'].isin(['15', '14', '13', '12', '11', '10', '09', '08', '07', '06', '05', '04', '03', '01']))
                            & ((df['esr'] != '3') & (df['esr'] != '6'))]['pwgtp'].sum() # working with no GED
    dct['not oy'] =     df[((~df['schl'].isin(['15', '14', '13', '12', '11', '10', '09', '08', '07', '06', '05', '04', '03', '01']) & ((df['esr'] != '3') & (df['esr'] != '6')))
                            | (df['sch'] != '1'))]['pwgtp'].sum() # either in school or working with a GED
    return dct

def split_by4_race(df):
    """
    Breaks the dataframe into hispanic, black, white, and other

    Parameters
    ----------
    df: data frame to be split up

    Returns
    -------
    dictionary of daraframes split by race
    """
    dct = {}
    dct['total']    = df.copy()
    dct['hispanic'] = df[(df['hisp'] != '01')]
    dct['black']    = df[(df['rac1p'] == '2') & (df['hisp'] == '01')]
    dct['white']    = df[(df['rac1p'] == '1') & (df['hisp'] == '01')]
    dct['other']    = df[(df['rac1p'] != '1') & (df['rac1p'] != '2') & (df['hisp'] == '01')]
    return dct

def split_by_age(df):
    """
    Breaks the dataframe into 16-18, 19-21, 22-24

    Parameters
    ----------
    df: data frame to be split up

    Returns
    -------
    dictionary of daraframes split by age
    """
    dct = {}
    dct['total'] = df.copy()
    dct['16-18'] = df[((df['agep'] >= 16) & (df['agep'] <= 18))]
    dct['19-21'] = df[((df['agep'] >= 19) & (df['agep'] <= 21))]
    dct['22-24'] = df[((df['agep'] >= 22) & (df['agep'] <= 24))]
    return dct


def breakdown_by_split(df, split, breakdown, optional_get = None):
    """
    Gives the oy breakdown by race for hispanic, black, white, and other

    Parameters
    ----------
    df: dataframe to be broken into race and then oportunity youth stats
    split: function that with split the tables into multiple tables
    breakdown: function that will break the tables into subcategories an get the sums
    optional_get: if a function is given, this will filter each table after being split

    Returns
    -------
    Dictionary of dictionarys where the keys are the race and the values are the breakdown sums
    """
    dct = {}
    groups = split(df)
    for group in groups.keys():
        if optional_get:
            dct[group] = breakdown(optional_get(groups[group]))
        else:
            dct[group] = breakdown(groups[group])
    return dct


def education_breakdown(df):
    """
    Breaks the dataframe into education acheivment totals

    Parameters
    ----------
    df: dataframe to be broken into education achievement

    Returns
    -------
    Dictionary of the total number of people with education achievement level
    """
    dct = {}
    dct['total']         = df.copy()['pwgtp'].sum()
    dct['no diploma']   = df[df['schl'].isin(['15', '14', '13', '12', '11', '10', '09', '08', '07', '06', '05', '04', '03', '02', '01'])]['pwgtp'].sum()
    dct['diploma']      = df[df['schl'].isin(['16', '17'])]['pwgtp'].sum()
    dct['some college'] = df[df['schl'].isin(['18', '19'])]['pwgtp'].sum()
    dct['degree']       = df[df['schl'].isin(['20', '21', '22', '23', '24'])]['pwgtp'].sum()

    return dct


def split_by_all_race(df): # TODO
    """
    Breaks the dataframe into many races

    Parameters
    ----------
    df: data frame to be split up

    Returns
    -------
    dictionary of daraframes split by race
    """
    dct = {}
    
    return dct

In [665]:
sql_str = """SELECT pwgtp, SCH, ESR, puma, agep FROM pums_2017 WHERE SCH = '1' and (ESR = '3' or ESR = '6') and (puma between '11610' and '11615') and (agep between '16' and '24')"""
total_youth_unweighted = pd.read_sql("SELECT * FROM pums_2017 WHERE (puma between '11610' and '11615') and (agep between '16' and '24')", conn)
oy_by_puma = pd.read_sql(f"""SELECT puma, SUM(pwgtp) AS TotalPeople FROM ({sql_str}) AS a GROUP BY puma""", conn)
total_oy = oy_by_puma['totalpeople'].sum()
oy_by_4race = breakdown_by_split(total_youth_unweighted, split_by4_race, oy_breakdown)
oy_by_age = breakdown_by_split(total_youth_unweighted, split_by_age, oy_breakdown)
oy_education_by_age = breakdown_by_split(total_youth_unweighted, split_by_age, education_breakdown, get_oy)
# oy_by_all_race = breakdown_by_split(total_youth_unweighted, split_by_all_race, oy_breakdown)
oy_by_race

{'total': {'total': 85883.0,
  'oy': 10614.0,
  'working': 3480.0,
  'not oy': 71789.0},
 'hispanic': {'total': 15942.0,
  'oy': 2133.0,
  'working': 1542.0,
  'not oy': 12267.0},
 'black': {'total': 8630.0, 'oy': 1315.0, 'working': 287.0, 'not oy': 7028.0},
 'white': {'total': 38759.0,
  'oy': 4392.0,
  'working': 1165.0,
  'not oy': 33202.0},
 'other': {'total': 22552.0,
  'oy': 2774.0,
  'working': 486.0,
  'not oy': 19292.0}}

In [463]:
conn.close()