# Creating Wage Percentile Table

In [None]:
import pandas as pd

Use SQL to create table
    
    CREATE TABLE kcmo_lehd.mo_wage_percentile
    (
      year bigint,
      quarter bigint,
      pctile integer,
      job_qtile double precision,
      avg_wage_qtile double precision,
      sum_wage_qtile double precision
    )

In [None]:
# define year range
years = range(2006,2017,1)
print(years)
# define percentile break points
quantiles = pd.np.arange(0.01,1.01,0.01) 

In [None]:
# iteratively add additional years to percentile update query:
with open('update_mo_wage_percentiles.sql', 'w') as qry_out:
    for y in years:
        # create query for this year
        qry = """INSERT INTO kcmo_lehd.mo_wage_percentile
        SELECT year, quarter, unnest(array[{1:}]) pctile,
        unnest(percentile_cont(array[{0:}]) WITHIN GROUP (ORDER BY job_count)) job_qtile,
        unnest(percentile_cont(array[{0:}]) WITHIN GROUP (ORDER BY avg_wage)) avg_wage_qtile,
        unnest(percentile_cont(array[{0:}]) WITHIN GROUP (ORDER BY sum_wage)) sum_wage_qtile
        FROM ( --subquery to aggregate individuals' jobs to single row per quarter
            SELECT ssn, name_first, name_middle, name_last, count(*) job_count, 
                avg(wage) avg_wage, sum(wage) sum_wage, year, quarter
            FROM kcmo_lehd.mo_wage 
            WHERE year = {2:}
            GROUP BY ssn, name_first, name_middle, name_last, year, quarter
        ) q
        GROUP BY year, quarter
        ORDER BY year, quarter;
        """.format(','.join([str(q) for q in quantiles]), 
                   ','.join([str(int(q*100)) for q in quantiles]), y)
        qry_out.write(qry)
        qry_out.write('\n')
        qry_out.write('\echo "completed year {}"'.format(y))
        qry_out.write('\n')

Then run the .sql file using psql (from the terminal):

    psql -h stuffed -d appliedda -f update_mo_wage_percentiles.sql &
    
> for some reason (because tried to run in background maybe?) that died at 2010, but 2006-2010 data looks correct so made a separate .sql file for remaining years
    
    psql -h stuffed -d appliedda -f update_mo_wage_percentiles_2011-16.sql
    
Lastly, alter the table to the schema owner so collaborators can edit table if needed, and VACUUM:

    ALTER TABLE kcmo_lehd.mo_wage_percentile OWNER TO kcmo_lehd_admin;
    
    VACUUM ANALYZE kcmo_lehd.mo_wage_percentile;