In [9]:
# Dependencies
import requests
from requests import Session
import csv
import copy
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine

In [10]:
# Construct url strings
immu_url = "https://data.wa.gov/api/views/kck7-yb2v/rows.csv?accessType=DOWNLOAD"
wa_county_url = "https://data.wa.gov/api/views/tecv-qzfm/rows.csv?accessType=DOWNLOAD"

In [11]:
# Extract immunization data
immunization_df = pd.read_csv(immu_url, parse_dates=True)
immunization_df.head()

Unnamed: 0,School_Name,School_Year,Reported,Reported_enrollment,Percent_complete_for_all_immunizations,Percent_conditional,Percent_out_of_compliance,Percent_with_any_exemption,Percent_with_medical_exemption,Percent_with_personal_exemption,...,Number_incomplete_for_HepatitisB,Number_incomplete_for_varicella,School_Type,School_District,County,ESD,Grade_Levels,Has_kindergarten,Has_6thGrade,Location 1
0,ADNA ELEMENTARY SCHOOL,2016-17,Y,33.0,87.9,3.0,3.0,6.1,0.0,3.0,...,3.0,2.0,PUBLIC SCHOOL,ADNA SCHOOL DISTRICT,LEWIS,EDUCATIONAL SERVICE DISTRICT 113,PK-5,1.0,0.0,P.O. BOX 28\nADNA
1,ADNA ELEMENTARY/MIDDLE/HIGH SCHOOL,2016-17,Y,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,PUBLIC SCHOOL,ADNA SCHOOL DISTRICT,LEWIS,EDUCATIONAL SERVICE DISTRICT 113,K-12,1.0,1.0,P.O. BOX 148\nADNA
2,ALMIRA ELEMENTARY SCHOOL,2016-17,Y,13.0,76.9,0.0,0.0,23.1,0.0,23.1,...,3.0,2.0,PUBLIC SCHOOL,ALMIRA SCHOOL DISTRICT,LINCOLN,EDUCATIONAL SERVICE DISTRICT 101,K -8,1.0,1.0,PO BOX 217\nALMIRA
3,ARCADIA ELEMENTARY,2016-17,Y,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,PUBLIC SCHOOL,DEER PARK SCHOOL DISTRICT,SPOKANE,EDUCATIONAL SERVICE DISTRICT 101,3-5,0.0,0.0,E. 1120 &quot;D&quot; STREET\nDEER PARK
4,ARTZ FOX ELEMENTARY,2016-17,Y,60.0,95.0,0.0,3.3,1.7,1.7,0.0,...,0.0,2.0,PUBLIC SCHOOL,MABTON SCHOOL DISTRICT,YAKIMA,EDUCATIONAL SERVICE DISTRICT 105,K -6,1.0,1.0,P.O. BOX 40\nMABTON


In [12]:
# Extract Washington county data
wa_county_df = pd.read_csv(wa_county_url, parse_dates=True)
wa_county_df.head()

Unnamed: 0,SEQUENCE,FILTER,COUNTY,JURISDICTION,POP_1990,POP_1991,POP_1992,POP_1993,POP_1994,POP_1995,...,POP_2009,POP_2010,POP_2011,POP_2012,POP_2013,POP_2014,POP_2015,POP_2016,POP_2017,POP_2018
0,1,1,Adams,Adams County,13603.0,13823.0,14063.0,14335.0,14679.0,15030.0,...,18421,18728,18950,19050,19200,19400,19410,19510,19870,20020
1,2,2,Adams,Unincorporated Adams County,6466.0,6698.0,6776.0,7009.0,7162.0,7303.0,...,8799,8818,8960,8980,9040,9135,9085,9105,9165,9220
2,3,3,Adams,Incorporated Adams County,7137.0,7125.0,7287.0,7326.0,7517.0,7727.0,...,9622,9910,9990,10070,10160,10265,10325,10405,10705,10800
3,4,4,Adams,Hatton,71.0,80.0,81.0,82.0,83.0,84.0,...,98,101,100,105,110,110,110,110,110,110
4,5,4,Adams,Lind,472.0,400.0,523.0,435.0,452.0,451.0,...,550,564,560,565,570,565,560,550,550,550


In [13]:
immunization_df.columns

Index(['School_Name', 'School_Year', 'Reported', 'Reported_enrollment',
       'Percent_complete_for_all_immunizations', 'Percent_conditional',
       'Percent_out_of_compliance', 'Percent_with_any_exemption',
       'Percent_with_medical_exemption', 'Percent_with_personal_exemption',
       'Percent_with_religious_exemption',
       'Percent_with_religious_membership_exemption',
       'Percent_complete_for_diphtheria_tetanus',
       'Percent_complete_for_pertussis',
       'Percent_complete_for_measles_mumps_rubella',
       'Percent_complete_for_polio', 'Percent_complete_for_HepatitisB',
       'Percent_complete_for_varicella',
       'Number_complete_for_all_immunizations', 'Number_conditional',
       'Number_out_of_compliance', 'Number_with_any_exemption',
       'Number_with_medical_exemption', 'Number_with_personal_exemption',
       'Number_with_religious_exemption',
       'Number_with_religious_membership_exemption',
       'Number_incomplete_for_diphtheria_tetanus',
      

In [14]:
immunization_df = immunization_df[['County', 'School_Year', 'Reported_enrollment', 'Number_complete_for_all_immunizations']].copy()

# Rename the columns
immunization_df = immunization_df.rename(columns = {'County':'county', 
                                                    'School_Year':'school_year', 
                                                    'Reported_enrollment':'number_reported',
                                                    'Number_complete_for_all_immunizations':'number_completed'})

# Take a peak
immunization_df.head()

Unnamed: 0,county,school_year,number_reported,number_completed
0,LEWIS,2016-17,33.0,29.0
1,LEWIS,2016-17,0.0,0.0
2,LINCOLN,2016-17,13.0,10.0
3,SPOKANE,2016-17,0.0,0.0
4,YAKIMA,2016-17,60.0,57.0


In [15]:
wa_county_df.columns

Index(['SEQUENCE', 'FILTER', 'COUNTY', 'JURISDICTION', 'POP_1990', 'POP_1991',
       'POP_1992', 'POP_1993', 'POP_1994', 'POP_1995', 'POP_1996', 'POP_1997',
       'POP_1998', 'POP_1999', 'POP_2000', 'POP_2001', 'POP_2002', 'POP_2003',
       'POP_2004', 'POP_2005', 'POP_2006', 'POP_2007', 'POP_2008', 'POP_2009',
       'POP_2010', 'POP_2011', 'POP_2012', 'POP_2013', 'POP_2014', 'POP_2015',
       'POP_2016', 'POP_2017', 'POP_2018'],
      dtype='object')

In [16]:
wa_county_df = wa_county_df[['COUNTY', 'POP_2016']].copy()

# Rename the columns
wa_county_df = wa_county_df.rename(columns = {'COUNTY':'county', 'POP_2016':'pop_2016'})

wa_county_df.head()

Unnamed: 0,county,pop_2016
0,Adams,19510
1,Adams,9105
2,Adams,10405
3,Adams,110
4,Adams,550
