# EIN Lookup Table

In [None]:
import pandas as pd
import psycopg2

## IL Tables

In [None]:
db_name = "appliedda"
hostname = "10.10.2.10"
conn = psycopg2.connect(database=db_name, host = hostname) #database connection

In [None]:
empr_table = 'il_des_kcmo.il_qcew_employers' 

### Number of Branches

In [None]:
query = '''
CREATE TEMP TABLE ein_nb_branch AS
SELECT ein, max(nb_branches) as max_nb_branches
FROM (
    SELECT ein, year, quarter as qtr, count(*) as nb_branches
    from {table}
    group by ein, year, qtr
) as a
group by ein;

commit;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### Legal Name

In [None]:
query = '''
CREATE TEMP TABLE ein_legal_name AS
select distinct on (ein) ein, legal_name, count
from (
    SELECT ein, name_legal as legal_name, count(*) as count
    from {table}
    where name_legal is not null
    group by ein, name_legal
    order by ein, count desc
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### NAICS

In [None]:
query = '''
CREATE TEMP TABLE ein_naics AS
select distinct on (ein) ein, naics, count
from (
    SELECT ein, naics, count(*) as count
    from {table}
    where naics is not null and naics != 'nan'
    group by ein, naics
    order by ein, count desc
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### Zipcode

In [None]:
query = '''
CREATE TEMP TABLE ein_pl_zip AS
select distinct on (ein) ein, pl_zip, count
from (
    SELECT ein, address_zip5 as pl_zip, count(*) as count
    from {table}
    where address_zip5 is not null
    group by ein, address_zip5
    order by ein, count desc
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### County

In [None]:
query = '''
CREATE TEMP TABLE ein_cnty AS
select distinct on (ein) ein, cnty, count
from (
    SELECT ein, county as cnty, count(*) as count
    from {table}
    where county is not null
    group by ein, county
    order by ein, count desc
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### First Appearance

In [None]:
query = '''
CREATE TEMP TABLE ein_start AS
SELECT distinct on (ein) ein, year as start_year, qtr as start_qtr
FROM (
    SELECT ein, year, quarter as qtr
    from {table}
    order by ein, year, qtr
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### Combine all tables

In [None]:
query = '''
CREATE TABLE IF NOT EXISTS ada_18_uchi.il_ein_lookup AS
select b.legal_name, a.*, 
    c.naics,
    case when a.max_nb_branches = 1 then d.pl_zip else null end as pl_zip,
    case when a.max_nb_branches = 1 then e.cnty else null end as cnty,
    f.start_year, f.start_qtr
from ein_nb_branch as a
left join ein_legal_name as b on a.ein = b.ein
left join ein_naics as c on a.ein = c.ein
left join ein_pl_zip as d on a.ein = d.ein 
left join ein_cnty as e on a.ein = e.ein 
left join ein_start as f on a.ein = f.ein;

alter table ada_18_uchi.il_ein_lookup owner to ada_18_uchi_admin;

commit;
'''
cursor = conn.cursor()
cursor.execute(query)

## MO Tables

In [None]:
db_name = "appliedda"
hostname = "10.10.2.10"
conn = psycopg2.connect(database=db_name, host = hostname) #database connection

In [None]:
empr_table = 'kcmo_lehd.mo_qcew_employers' 

### Number of Branches

In [None]:
query = '''
CREATE TEMP TABLE ein_nb_branch AS
SELECT ein, max(nb_branches) as max_nb_branches
FROM (
    SELECT ein, year, qtr, count(*) as nb_branches
    from {table}
    group by ein, year, qtr
) as a
group by ein;

commit;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### Legal Name

In [None]:
query = '''
CREATE TEMP TABLE ein_legal_name AS
select distinct on (ein) ein, legal_name, count
from (
    SELECT ein, legal_name, count(*) as count
    from {table}
    where legal_name is not null
    group by ein, legal_name
    order by ein, count desc
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### NAICS

In [None]:
query = '''
CREATE TEMP TABLE ein_naics AS
select distinct on (ein) ein, naics, count
from (
    SELECT ein, naics, count(*) as count
    from {table}
    where naics is not null and naics != 'nan'
    group by ein, naics
    order by ein, count desc
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### Zipcode

In [None]:
query = '''
CREATE TEMP TABLE ein_pl_zip AS
select distinct on (ein) ein, pl_zip, count
from (
    SELECT ein, pl_zip, count(*) as count
    from {table}
    where pl_zip is not null
    group by ein, pl_zip
    order by ein, count desc
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### County

In [None]:
query = '''
CREATE TEMP TABLE ein_cnty AS
select distinct on (ein) ein, cnty, count
from (
    SELECT ein, cnty, count(*) as count
    from {table}
    where cnty is not null
    group by ein, cnty
    order by ein, count desc
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### First Appearance

In [None]:
query = '''
CREATE TEMP TABLE ein_start AS
SELECT distinct on (ein) ein, year as start_year, qtr as start_qtr
FROM (
    SELECT ein, year, qtr
    from {table}
    order by ein, year, qtr
) as a;
'''.format(table = empr_table)
cursor = conn.cursor()
cursor.execute(query)

### Combine all tables

In [None]:
query = '''
CREATE TABLE IF NOT EXISTS ada_18_uchi.mo_ein_lookup AS
select b.legal_name, a.*, 
    c.naics,
    case when a.max_nb_branches = 1 then d.pl_zip else null end as pl_zip,
    case when a.max_nb_branches = 1 then e.cnty else null end as cnty,
    f.start_year, f.start_qtr
from ein_nb_branch as a
left join ein_legal_name as b on a.ein = b.ein
left join ein_naics as c on a.ein = c.ein
left join ein_pl_zip as d on a.ein = d.ein 
left join ein_cnty as e on a.ein = e.ein 
left join ein_start as f on a.ein = f.ein;

alter table ada_18_uchi.mo_ein_lookup owner to ada_18_uchi_admin;

commit;
'''
cursor = conn.cursor()
cursor.execute(query)