# Data Preparation

## Python Setup

In [None]:
import pandas as pd
import numpy as np
import psycopg2

In [None]:
db_name = "appliedda"
hostname = "10.10.2.10"
conn = psycopg2.connect(database=db_name, host = hostname) #database connection

## SQL Query

In [None]:
script = '''
create table if not exists ada_kcmo.dashboard_wages_mo as
select 
    a.ssn
    , a.wage
    , a.ein
    , a.seinunit as run
    , a.empr_no as ui_acct
    , a.year
    , a.quarter as qtr
    , b.pl_addr1
    , b.pl_city
    , b.pl_zip
    , b.cnty
    , b.naics
    , case when b.flag = 1 then 1 else 0 end as merge_status
from kcmo_lehd.mo_wage as a
left join (select *, 1 as flag from kcmo_lehd.mo_qcew_employers) as b
on a.ein = b.ein and a.seinunit = b.run and a.empr_no = b.ui_acct and a.year = b.year and a.quarter = b.qtr;

alter table ada_kcmo.dashboard_wages_mo owner to ada_kcmo_admin;
commit;
'''

In [None]:
c = conn.cursor()
c.execute(script)

In [None]:
script = '''
create table if not exists ada_kcmo.dashboard_wage_buckets_mo as 
select
    year
    , qtr
    , cnty
    , left(naics, 2) as naics
    , (wage/1000)*1000 as wage_bucket
    , count(*) as nb_jobs
    , avg(wage) as avg_wage
from ada_kcmo.dashboard_wages_mo
group by year, qtr, cnty, left(naics, 2), (wage/1000)*1000
order by year, qtr, cnty, left(naics, 2), (wage/1000)*1000;

alter table ada_kcmo.dashboard_wage_buckets_mo owner to ada_kcmo_admin;
commit;
'''

In [None]:
c = conn.cursor()
c.execute(script)

## IL Border Data

In [None]:
script = '''
create table if not exists ada_kcmo.dashboard_wages_il as
select 
    a.ssn
    , a.wage
    , a.ein
    , a.seinunit as run
    , a.empr_no as ui_acct
    , a.year
    , a.quarter as qtr
    , b.address_street1
    , b.address_city
    , b.address_zip5
    , b.county as cnty
    , b.naics
    , case when b.flag = 1 then 1 else 0 end as merge_status
from il_des_kcmo.il_wage as a
left join (select *, 1 as flag from il_des_kcmo.il_qcew_employers) as b
on a.ein = b.ein and a.seinunit = b.seinunit and a.empr_no = b.empr_no and a.year = b.year and a.quarter = b.quarter
where b.county in ('067', '001', '149', '013', '083', '119', '163', '133', '157', '077', '181', '003');

alter table ada_kcmo.dashboard_wages_il owner to ada_kcmo_admin;
commit;
'''

In [None]:
c = conn.cursor()
c.execute(script)

In [None]:
script = '''
create table if not exists ada_kcmo.dashboard_wage_buckets_il as 
select
    year
    , qtr
    , cnty
    , left(naics, 2) as naics
    , (wage/1000)*1000 as wage_bucket
    , count(*) as nb_jobs
    , avg(wage) as avg_wage
from ada_kcmo.dashboard_wages_il
group by year, qtr, cnty, left(naics, 2), (wage/1000)*1000
order by year, qtr, cnty, left(naics, 2), (wage/1000)*1000;

alter table ada_kcmo.dashboard_wage_buckets_il owner to ada_kcmo_admin;
commit;
'''

In [None]:
c = conn.cursor()
c.execute(script)