In [1]:
import os
import json
import hashlib
import requests
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

In [2]:
BASE_URL = "https://educationdata.urban.org/api/v1/"
CACHE_DIR = "cache"
os.makedirs(CACHE_DIR, exist_ok=True)

YEARS = [2011,2013,2015,2017,2020]
FIPS_LIST = [55, 27, 17, 18, 39, 19, 31, 8, 37, 26, 13, 41, 48]
RACE_MAP = {
    1: "White", 2: "Black", 3: "Hispanic", 4: "Asian",
    5: "American Indian/Alaska Native", 6: "Native Hawaiian/Pacific Islander",
    7: "Two or more races", 8: "Nonresident alien"
}


SEX_MAP = {
    1: "Male",
    2: "Female"
}

VALID_RACES = set(RACE_MAP.keys())
VALID_SEXES = set(SEX_MAP.keys())

city_state_to_county = {
    ("madison", "WI"): "Dane County, WI",
    ("st. paul", "MN"): "Ramsey County, MN",
    ("springfield", "IL"): "Sangamon County, IL",
    ("indianapolis", "IN"): "Marion County, IN",
    ("columbus", "OH"): "Franklin County, OH",
    ("des moines", "IA"): "Polk County, IA",
    ("lincoln", "NE"): "Lancaster County, NE",
    ("boulder", "CO"): "Boulder County, CO",
    ("raleigh", "NC"): "Wake County, NC",
    ("champaign", "IL"): "Champaign County, IL",
    ("green bay", "WI"): "Brown County, WI",
    ("milwaukee", "WI"): "Milwaukee County, WI",
    ("ann arbor", "MI"): "Washtenaw County, MI",
    ("athens", "GA"): "Clarke County, GA",
    ("eugene", "OR"): "Lane County, OR",
    ("rockwall", "TX"): "Rockwall County, TX"
}

target_counties = {
    "Dane County, WI", "Ramsey County, MN", "Sangamon County, IL", "Marion County, IN",
    "Franklin County, OH", "Polk County, IA", "Lancaster County, NE", "Boulder County, CO",
    "Wake County, NC", "Champaign County, IL", "Brown County, WI", "Milwaukee County, WI",
    "Washtenaw County, MI", "Clarke County, GA", "Lane County, OR", "Rockwall County, TX"
}



In [3]:
VALID_RACES = set(range(1, 9))  # 1–9
VALID_SEXES = {1, 2}            # Male, Female


years = [2011,2013,2015,2017,2020]
fips = [55, 27, 17, 18, 39, 19, 31, 8, 37, 26, 13, 41, 48]
records = []

for year in years:
     for fip in fips:
         print(f"Fetching {year} for {fip}")
         url = f"{BASE_URL}schools/crdc/sat-act-participation/{year}/race/sex?fips={fip}"
        
         try:
             resp = requests.get(url)
             resp.raise_for_status()
             for row in resp.json().get("results", []):
                 leaid = row.get("leaid")
                 race = row.get("race")
                 sex = row.get("sex")
                 student_participation = row.get("students_SAT_ACT")
                 fips_val = row.get("fips")


                 if (
                     leaid is not None and
                     student_participation is not None and
                     student_participation not in (-1, -2, -3) and
                     race in VALID_RACES and
                     sex in VALID_SEXES
                 ):
                     records.append({
                         "leaid": leaid,
                         "race": race,
                         "sex": sex,
                         "student_participation": student_participation,  
                         "year": year,
                         "fips": fips_val
                     })
         except Exception as e:
             print(f"Error fetching {year}: {e}")


df = pd.DataFrame(records)

Fetching 2011 for 55
Fetching 2011 for 27
Fetching 2011 for 17
Fetching 2011 for 18
Fetching 2011 for 39
Fetching 2011 for 19
Fetching 2011 for 31
Fetching 2011 for 8
Fetching 2011 for 37
Fetching 2011 for 26
Fetching 2011 for 13
Fetching 2011 for 41
Fetching 2011 for 48
Fetching 2013 for 55
Fetching 2013 for 27
Fetching 2013 for 17
Fetching 2013 for 18
Fetching 2013 for 39
Fetching 2013 for 19
Fetching 2013 for 31
Fetching 2013 for 8
Fetching 2013 for 37
Fetching 2013 for 26
Fetching 2013 for 13
Fetching 2013 for 41
Fetching 2013 for 48
Fetching 2015 for 55
Fetching 2015 for 27
Fetching 2015 for 17
Fetching 2015 for 18
Fetching 2015 for 39
Fetching 2015 for 19
Fetching 2015 for 31
Fetching 2015 for 8
Fetching 2015 for 37
Fetching 2015 for 26
Fetching 2015 for 13
Fetching 2015 for 41
Fetching 2015 for 48
Fetching 2017 for 55
Fetching 2017 for 27
Fetching 2017 for 17
Fetching 2017 for 18
Fetching 2017 for 39
Fetching 2017 for 19
Fetching 2017 for 31
Fetching 2017 for 8
Fetching 2017 for

In [4]:
df


Unnamed: 0,leaid,race,sex,student_participation,year,fips
0,5500016,1,1,5,2011,55
1,5500016,2,1,0,2011,55
2,5500016,3,1,0,2011,55
3,5500016,4,1,0,2011,55
4,5500016,5,1,0,2011,55
...,...,...,...,...,...,...
137763,4817960,2,1,1,2020,48
137764,4817960,3,1,0,2020,48
137765,4817960,6,1,0,2020,48
137766,4817960,7,1,0,2020,48


In [5]:
leaid_county_map = []
fips = [55, 27, 17, 18, 39, 19, 31, 8, 37, 26, 13, 41, 48]
for year in YEARS:
    for fip in fips:
        print(f"Fetching {year} for {fip}")
        url = f"{BASE_URL}schools/ccd/directory/{year}/?fips={fip}"

        try:
            response = requests.get(url)
            response.raise_for_status()
            results = response.json().get("results", [])
    
            for record in results:
                leaid = record.get("leaid")
                city = record.get("city_mailing", "").lower().strip()
                state = record.get("state_mailing", "").strip()
                county_full = city_state_to_county.get((city, state))
    
                if county_full and leaid:
                    leaid_county_map.append({
                        "leaid": leaid,
                        "county_full": county_full,
                        "year": year
                    })
    
        except Exception as e:
            print(f"Failed for {year}: {e}")


df_mapping = pd.DataFrame(leaid_county_map)


df_mapping = df_mapping.sort_values("year").drop_duplicates(subset="leaid", keep="last")

Fetching 2011 for 55
Fetching 2011 for 27
Fetching 2011 for 17
Fetching 2011 for 18
Fetching 2011 for 39
Fetching 2011 for 19
Fetching 2011 for 31
Fetching 2011 for 8
Fetching 2011 for 37
Fetching 2011 for 26
Fetching 2011 for 13
Fetching 2011 for 41
Fetching 2011 for 48
Fetching 2013 for 55
Fetching 2013 for 27
Fetching 2013 for 17
Fetching 2013 for 18
Fetching 2013 for 39
Fetching 2013 for 19
Fetching 2013 for 31
Fetching 2013 for 8
Fetching 2013 for 37
Fetching 2013 for 26
Fetching 2013 for 13
Fetching 2013 for 41
Fetching 2013 for 48
Fetching 2015 for 55
Fetching 2015 for 27
Fetching 2015 for 17
Fetching 2015 for 18
Fetching 2015 for 39
Fetching 2015 for 19
Fetching 2015 for 31
Fetching 2015 for 8
Fetching 2015 for 37
Fetching 2015 for 26
Fetching 2015 for 13
Fetching 2015 for 41
Fetching 2015 for 48
Fetching 2017 for 55
Fetching 2017 for 27
Fetching 2017 for 17
Fetching 2017 for 18
Fetching 2017 for 39
Fetching 2017 for 19
Fetching 2017 for 31
Fetching 2017 for 8
Fetching 2017 for

In [6]:
leaid_map = df_mapping
leaid_map = leaid_map[['leaid', 'county_full']]
df2 = df.merge(leaid_map, on="leaid", how="inner")
df2 = df2.sort_values(by='county_full')

df2['race'] = df2['race'].map(RACE_MAP)
df2['sex'] = df2['sex'].map(SEX_MAP)
df2

Unnamed: 0,leaid,race,sex,student_participation,year,fips,county_full
1157,0802490,Hispanic,Female,20,2011,8,"Boulder County, CO"
1163,0802490,Black,Male,0,2011,8,"Boulder County, CO"
1162,0802490,White,Male,2,2011,8,"Boulder County, CO"
1161,0802490,Two or more races,Female,2,2011,8,"Boulder County, CO"
1160,0802490,Native Hawaiian/Pacific Islander,Female,0,2011,8,"Boulder County, CO"
...,...,...,...,...,...,...,...
1416,2600315,Hispanic,Male,0,2011,26,"Washtenaw County, MI"
1415,2600315,Black,Male,0,2011,26,"Washtenaw County, MI"
1414,2600315,White,Male,2,2011,26,"Washtenaw County, MI"
1412,2600164,Native Hawaiian/Pacific Islander,Female,0,2011,26,"Washtenaw County, MI"


In [12]:

grouped_df = df2.groupby(['county_full', 'race', 'sex', 'year'])['student_participation'].mean().reset_index()

grouped_df

Unnamed: 0,county_full,race,sex,year,student_participation
0,"Boulder County, CO",American Indian/Alaska Native,Female,2011,0.615385
1,"Boulder County, CO",American Indian/Alaska Native,Female,2013,0.769231
2,"Boulder County, CO",American Indian/Alaska Native,Female,2015,0.307692
3,"Boulder County, CO",American Indian/Alaska Native,Female,2017,0.230769
4,"Boulder County, CO",American Indian/Alaska Native,Male,2011,0.615385
...,...,...,...,...,...
891,"Washtenaw County, MI",White,Male,2011,11.800000
892,"Washtenaw County, MI",White,Male,2013,6.200000
893,"Washtenaw County, MI",White,Male,2015,21.333333
894,"Washtenaw County, MI",White,Male,2017,18.750000
