# Great Schools API Call
https://www.greatschools.org/

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
api_key = ''

### Method: School Search
Documentation: https://www.greatschools.org/api/docs/schoolSearch.page

    ie. https://api.greatschools.org/search/schools?key=[yourAPIKey]&state=CA&q=Alameda
    Searches for schools using the query string "Alameda" in California

In [34]:
SD_County_Schools = pd.read_csv('./VOSD San Diego County Schools Dataset 2017.csv')
print 'Number of unique cities included in SD County Schools Dataset: ', SD_County_Schools['City'].nunique()
cities = list(SD_County_Schools['City'].unique())

Number of unique cities included in SD County Schools Dataset:  45


In [35]:
schools_dict = {'gsid':list(), 'name':list(), 'type':list(), 'graderange':list(), 'enrollment':list(), 
                'gsrating':list(), 'city':list(), 'state':list(), 'districtid':list(), 'district':list(), 
                'districtncesid':list(), 'address':list(), 'phone':list(), 'fax':list(), 'website':list(), 
                'ncesid':list(), 'lat':list(), 'lon':list(), 'overviewlink':list(), 'ratingslink':list(), 
                'reviewslink':list(), 'schoolstatslink':list()}  

for city in cities:
    URL = 'https://api.greatschools.org/search/schools?key='+ api_key + '&state=CA&q='+city+ '&limit=1000'
    request = requests.get(URL)
    response = BeautifulSoup(request.content, 'html.parser')
    
    content = response.schools.contents
    for key in schools_dict.keys():
        for i in range(len(content)):
            try:
                schools_dict.get(key).append(content[i].find(key).text)
            except:
                schools_dict.get(key).append(None)

In [53]:
school_search = pd.DataFrame.from_dict(schools_dict, orient='columns')
print 'Prior to dropping duplicates, the data shape is: ', school_search.shape
school_search = school_search.drop_duplicates()
print 'After dropping duplicates, the data shape is: ', school_search.shape

Prior to dropping duplicates, the data shape is:  (1999, 22)
After dropping duplicates, the data shape is:  (1717, 22)


### Method: City Overview
Documentation: https://www.greatschools.org/api/docs/city-overview/

    ie. https://api.greatschools.org/cities/CA/San-Francisco?key=[yourAPIKey]
    Returns city information for San Francisco, CA

In [304]:
city_overview_dict = {'name':list(), 'rating':list(), 'totalschools':list(), 'elementaryschools':list(), 
                      'middleschools':list(), 'highschools':list(), 'publicschools':list(), 'charterschools':list(), 
                      'privateschools':list()}

for city in cities:
    URL = 'https://api.greatschools.org/cities/CA/'+ city +'?key='+ api_key
    request = requests.get(URL)
    response = BeautifulSoup(request.content, 'html.parser')

    for key in city_overview_dict.keys():
        try:
            city_overview_dict.get(key).append(response.find(key).text)
        except:
            city_overview_dict.get(key).append(None)

In [307]:
city_overview = pd.DataFrame.from_dict(city_overview_dict, orient='columns')
print 'The shape of the data is: ', city_overview.shape

The shape of the data is:  (45, 9)


### Method: School Profile
Documentation: https://www.greatschools.org/api/docs/schoolProfile.page

    ie. https://api.greatschools.org/schools/CA/1?key=[yourkey]
    Returns school profile for school in California with GreatSchools ID of 1
    
This API method will not be used because the information is already included via the School Search method.

### Method: School Census Data
Documentation: https://www.greatschools.org/api/docs/schoolCensusData.page

    ie. https://api.greatschools.org/school/census/ND/20?key=[yourkey]
    Returns census and profile data for school in North Dakota with GreatSchools ID of 20

In [499]:
gsids = list(school_search['gsid'].astype(str))
print 'Number of schools searched: ', len(gsids)

school_census_data_responses = []
count = 1
for gsid in gsids:
    URL = 'https://api.greatschools.org/school/census/CA/'+ gsid + '?key=' + api_key
    request = requests.get(URL)
    response = BeautifulSoup(request.content, 'html.parser')
    school_census_data_responses.append(response)
#     print count
#     count += 1

In [495]:
census_dict_1 = {'gsid':list(), 'headofficialname':list(), 'headofficialemail':list(), 'freeandreducedpricelunch':list()}
census_dict_2 = {'year':list(), 'Asian':list(), 'Native American or Native Alaskan':list(), 'Pacific Islander':list(), 
               'Hispanic':list(), 'Black, non-Hispanic':list(), 'Multiracial':list(), 'Filipino':list(), 
               'White, non-Hispanic':list()}

for r in range(len(school_census_data_responses)):
    response = school_census_data_responses[r]
    
    content_1 = response.contents[1]
    for key in census_dict_1.keys():
        if key == 'gsid':
            census_dict_1.get(key).append(gsids[r])
        else:
            try:
                census_dict_1.get(key).append(content_1.find(key).text)
            except:
                census_dict_1.get(key).append(None)
               
    ethnicity = response.ethnicities.find_all('name')
    value = response.ethnicities.find_all('value')
    for k in census_dict_2.keys():
        try:
            if k == 'year':
                census_dict_2.get(k).append(response.find('year').text)
            else:
                census_dict_2.get(k).append(value[i].text)
        except:
            census_dict_2.get(k).append(None)

In [496]:
census_1 = pd.DataFrame.from_dict(census_dict_1, orient='columns')
census_2 = pd.DataFrame.from_dict(census_dict_2, orient='columns')
school_census_data = pd.concat([census_1, census_2], axis=1)
print 'The data shape is: ', school_census_data.shape

The data shape is:  (1717, 13)


### Method: School Test Scores
Documentation: https://www.greatschools.org/api/docs/school-test-scores/

    ie. https://api.greatschools.org/school/tests/CA/1?key=[yourkey]
    Returns test scores for school in California with GreatSchools ID of 1

In [521]:
gsids = list(school_search['gsid'].astype(str))
print 'Number of schools searched: ', len(gsids)

test_score_responses = []
count = 1
for gsid in gsids:
    URL = 'https://api.greatschools.org/school/tests/CA/'+ gsid + '?key=' + api_key
    request = requests.get(URL)
    response = BeautifulSoup(request.content, 'html.parser')
    test_score_responses.append(response)
#     print count
#     count += 1

In [515]:
ts_dict_1 = {'gsid':list(), 'name':list(), 'rating':list(), 'id':list(), 'description':list(), 'abbreviation':list(), 
             'scale':list(), 'levelcode':list()}
ts_dict_2 = {'breakdownname':list(), 'gradename':list(), 'levelcodename':list(), 'numbertested':list(), 
             'proficiencybandname':list(), 'score':list(), 'subjectname':list(), 'testid':list(), 'year':list()}

for g in range(len(test_score_responses)):
    content = test_score_response.find_all('test')
    for c in range(len(content)):    
        results = content[c].find_all('testresult')
        for r in range(len(results)):
            for key in ts_dict_2.keys():
                try:
                    ts_dict_2.get(key).append(results[r].find(key).text)
                except:
                    ts_dict_2.get(key).append(None)
        for k in ts_dict_1.keys():
            if k == 'gsid':
                ts_dict_1.get(k).extend([gsids[g]]*len(results))
            else:
                try:
                    ts_dict_1.get(k).extend([content[c].find(k).text]*len(results))
                except:
                    ts_dict_1.get(k).extend([None]*len(results))

In [519]:
ts_1 = pd.DataFrame.from_dict(ts_dict_1, orient='columns')
ts_2 = pd.DataFrame.from_dict(ts_dict_2, orient='columns')
test_scores = pd.concat([ts_1, ts_2], axis=1)
print 'The shape of the data is: ', test_scores.shape

The shape of the data is:  (22321, 17)


### Export Data for Further Analysis

In [498]:
school_census_data.to_csv('school_census_data.csv', encoding='utf-8', index=False)

In [54]:
school_search.to_csv('school_search.csv', encoding='utf-8', index=False)

In [308]:
city_overview.to_csv('city_overview.csv', encoding='utf-8', index=False)

In [520]:
test_scores.to_csv('test_scores.csv', encoding='utf-8', index=False)