In [4]:
# dependencies
import pandas as pd 
import requests 
import us
import json
from pprint import pprint 
# api keys
from api_keys import census


In [5]:
# Built URL using Census variables website https://api.census.gov/data/timeseries/poverty/saipe/variables.html
url="https://api.census.gov/data/timeseries/poverty/saipe/schdist?get=GEOID,SD_NAME,SAEPOV5_17RV_PT&for=school+district+(Secondary)&YEAR=2017"

# Build query URL
query_url = url + "&key=" + census 

# Get poverty data
pov_response = requests.get(query_url)
pov_json = pov_response.json()


In [9]:
pov_response

<Response [200]>

In [10]:
pov_json[0]

['GEOID',
 'SD_NAME',
 'SAEPOV5_17RV_PT',
 'YEAR',
 'state',
 'school district (secondary)']

In [11]:
pov_df=pd.DataFrame(columns=pov_json[0], data=pov_json)
pov_df

Unnamed: 0,GEOID,SD_NAME,SAEPOV5_17RV_PT,YEAR,state,school district (secondary)
0,GEOID,SD_NAME,SAEPOV5_17RV_PT,YEAR,state,school district (secondary)
1,0400082,Colorado River Union High School District,580,2017,04,00082
2,0400450,Agua Fria Union High School District,868,2017,04,00450
3,0400720,Antelope Union High School District,72,2017,04,00720
4,0401160,Bicentennial Union High School District,45,2017,04,01160
...,...,...,...,...,...,...
399,5513290,Westosha Central High School District,67,2017,55,13290
400,5515150,Union Grove Union High School District,38,2017,55,15150
401,5515480,Big Foot Union High School District,46,2017,55,15480
402,5515600,Waterford Union High School District,39,2017,55,15600


In [15]:
# Drop first row
pov_df.drop(index=pov_df.index[0], axis=0,inplace=True)


In [16]:
# Reset the index
pov_df.reset_index(inplace=True, drop=True)
pov_df

Unnamed: 0,GEOID,SD_NAME,SAEPOV5_17RV_PT,YEAR,state,school district (secondary),State
0,0400450,Agua Fria Union High School District,868,2017,04,00450,Arizona
1,0400720,Antelope Union High School District,72,2017,04,00720,Arizona
2,0401160,Bicentennial Union High School District,45,2017,04,01160,Arizona
3,0401410,Buckeye Union High School District,559,2017,04,01410,Arizona
4,0401740,Casa Grande Union High School District,913,2017,04,01740,Arizona
...,...,...,...,...,...,...,...
397,5513290,Westosha Central High School District,67,2017,55,13290,Wisconsin
398,5515150,Union Grove Union High School District,38,2017,55,15150,Wisconsin
399,5515480,Big Foot Union High School District,46,2017,55,15480,Wisconsin
400,5515600,Waterford Union High School District,39,2017,55,15600,Wisconsin


In [17]:
# Change state fips codes to State name
fips_to_name = us.states.mapping("fips", "name")
pov_df["State"] = pov_df["state"].map(fips_to_name)
pov_df


Unnamed: 0,GEOID,SD_NAME,SAEPOV5_17RV_PT,YEAR,state,school district (secondary),State
0,0400450,Agua Fria Union High School District,868,2017,04,00450,Arizona
1,0400720,Antelope Union High School District,72,2017,04,00720,Arizona
2,0401160,Bicentennial Union High School District,45,2017,04,01160,Arizona
3,0401410,Buckeye Union High School District,559,2017,04,01410,Arizona
4,0401740,Casa Grande Union High School District,913,2017,04,01740,Arizona
...,...,...,...,...,...,...,...
397,5513290,Westosha Central High School District,67,2017,55,13290,Wisconsin
398,5515150,Union Grove Union High School District,38,2017,55,15150,Wisconsin
399,5515480,Big Foot Union High School District,46,2017,55,15480,Wisconsin
400,5515600,Waterford Union High School District,39,2017,55,15600,Wisconsin


In [18]:
# Change state fips codes to State Abbreviation so that this can match the other dataset if needed
fips_to_name = us.states.mapping("fips", "abbr")
pov_df["State Abbr"] = pov_df["state"].map(fips_to_name)
pov_df


Unnamed: 0,GEOID,SD_NAME,SAEPOV5_17RV_PT,YEAR,state,school district (secondary),State,State Abbr
0,0400450,Agua Fria Union High School District,868,2017,04,00450,Arizona,AZ
1,0400720,Antelope Union High School District,72,2017,04,00720,Arizona,AZ
2,0401160,Bicentennial Union High School District,45,2017,04,01160,Arizona,AZ
3,0401410,Buckeye Union High School District,559,2017,04,01410,Arizona,AZ
4,0401740,Casa Grande Union High School District,913,2017,04,01740,Arizona,AZ
...,...,...,...,...,...,...,...,...
397,5513290,Westosha Central High School District,67,2017,55,13290,Wisconsin,WI
398,5515150,Union Grove Union High School District,38,2017,55,15150,Wisconsin,WI
399,5515480,Big Foot Union High School District,46,2017,55,15480,Wisconsin,WI
400,5515600,Waterford Union High School District,39,2017,55,15600,Wisconsin,WI


In [19]:
# Rename state colomn to be State Fips code
pov_df.rename(columns={'state':'State Fips'}, inplace = True)
# Rename Census data code to appropriate name
pov_df.rename(columns={'SAEPOV5_17RV_PT':'Ages 5-17 in Families in Poverty, Count Est'}, inplace = True)
# Rename SD to School District Name
pov_df.rename(columns={'SD_NAME':'School Dist Name'}, inplace = True)
# Rename school district (secondary) to School District Code
pov_df.rename(columns={'school district (secondary)':'School Dist Code'}, inplace = True)
# Reorder columns 
pov_df=pov_df[['GEOID','State Fips','State','State Abbr','School Dist Code','School Dist Name','Ages 5-17 in Families in Poverty, Count Est','YEAR']]
pov_df

Unnamed: 0,GEOID,State Fips,State,State Abbr,School Dist Code,School Dist Name,"Ages 5-17 in Families in Poverty, Count Est",YEAR
0,0400450,04,Arizona,AZ,00450,Agua Fria Union High School District,868,2017
1,0400720,04,Arizona,AZ,00720,Antelope Union High School District,72,2017
2,0401160,04,Arizona,AZ,01160,Bicentennial Union High School District,45,2017
3,0401410,04,Arizona,AZ,01410,Buckeye Union High School District,559,2017
4,0401740,04,Arizona,AZ,01740,Casa Grande Union High School District,913,2017
...,...,...,...,...,...,...,...,...
397,5513290,55,Wisconsin,WI,13290,Westosha Central High School District,67,2017
398,5515150,55,Wisconsin,WI,15150,Union Grove Union High School District,38,2017
399,5515480,55,Wisconsin,WI,15480,Big Foot Union High School District,46,2017
400,5515600,55,Wisconsin,WI,15600,Waterford Union High School District,39,2017


In [20]:
# drop duplicates if any
pov_df.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)

Unnamed: 0,GEOID,State Fips,State,State Abbr,School Dist Code,School Dist Name,"Ages 5-17 in Families in Poverty, Count Est",YEAR
0,0400450,04,Arizona,AZ,00450,Agua Fria Union High School District,868,2017
1,0400720,04,Arizona,AZ,00720,Antelope Union High School District,72,2017
2,0401160,04,Arizona,AZ,01160,Bicentennial Union High School District,45,2017
3,0401410,04,Arizona,AZ,01410,Buckeye Union High School District,559,2017
4,0401740,04,Arizona,AZ,01740,Casa Grande Union High School District,913,2017
...,...,...,...,...,...,...,...,...
397,5513290,55,Wisconsin,WI,13290,Westosha Central High School District,67,2017
398,5515150,55,Wisconsin,WI,15150,Union Grove Union High School District,38,2017
399,5515480,55,Wisconsin,WI,15480,Big Foot Union High School District,46,2017
400,5515600,55,Wisconsin,WI,15600,Waterford Union High School District,39,2017


In [21]:
# drop null values if any
pov_df.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)

Unnamed: 0,GEOID,State Fips,State,State Abbr,School Dist Code,School Dist Name,"Ages 5-17 in Families in Poverty, Count Est",YEAR
0,0400450,04,Arizona,AZ,00450,Agua Fria Union High School District,868,2017
1,0400720,04,Arizona,AZ,00720,Antelope Union High School District,72,2017
2,0401160,04,Arizona,AZ,01160,Bicentennial Union High School District,45,2017
3,0401410,04,Arizona,AZ,01410,Buckeye Union High School District,559,2017
4,0401740,04,Arizona,AZ,01740,Casa Grande Union High School District,913,2017
...,...,...,...,...,...,...,...,...
397,5513290,55,Wisconsin,WI,13290,Westosha Central High School District,67,2017
398,5515150,55,Wisconsin,WI,15150,Union Grove Union High School District,38,2017
399,5515480,55,Wisconsin,WI,15480,Big Foot Union High School District,46,2017
400,5515600,55,Wisconsin,WI,15600,Waterford Union High School District,39,2017


In [22]:
pov_df.sample(100)

Unnamed: 0,GEOID,State Fips,State,State Abbr,School Dist Code,School Dist Name,"Ages 5-17 in Families in Poverty, Count Est",YEAR
400,5515600,55,Wisconsin,WI,15600,Waterford Union High School District,39,2017
356,3408190,34,New Jersey,NJ,08190,Lakeland Regional School District,61,2017
369,3412510,34,New Jersey,NJ,12510,Pascack Valley Regional School District,52,2017
195,2300053,23,Maine,ME,00053,Five Town Community School District,45,2017
371,3412600,34,New Jersey,NJ,12600,Passaic County Manchester Regional School Dist...,226,2017
...,...,...,...,...,...,...,...,...
31,0611820,06,California,CA,11820,East Side Union High School District,3075,2017
8,0406330,04,Arizona,AZ,06330,Phoenix Union High School District,12330,2017
249,3005310,30,Montana,MT,05310,Butte High School District,174,2017
154,1726880,17,Illinois,IL,26880,J. Sterling Morton High School District 201,1651,2017


In [24]:
# Reset the index
pov_df.reset_index(inplace=True, drop=True)
pov_df

Unnamed: 0,GEOID,State Fips,State,State Abbr,School Dist Code,School Dist Name,"Ages 5-17 in Families in Poverty, Count Est",YEAR
0,0400450,04,Arizona,AZ,00450,Agua Fria Union High School District,868,2017
1,0400720,04,Arizona,AZ,00720,Antelope Union High School District,72,2017
2,0401160,04,Arizona,AZ,01160,Bicentennial Union High School District,45,2017
3,0401410,04,Arizona,AZ,01410,Buckeye Union High School District,559,2017
4,0401740,04,Arizona,AZ,01740,Casa Grande Union High School District,913,2017
...,...,...,...,...,...,...,...,...
397,5513290,55,Wisconsin,WI,13290,Westosha Central High School District,67,2017
398,5515150,55,Wisconsin,WI,15150,Union Grove Union High School District,38,2017
399,5515480,55,Wisconsin,WI,15480,Big Foot Union High School District,46,2017
400,5515600,55,Wisconsin,WI,15600,Waterford Union High School District,39,2017


In [25]:
# Save to csv
pov_df.to_csv("School_Poverty_data2017.csv",index=False, header=True)