In [2]:
import pandas as pd
import requests

df = pd.read_csv(r"C:\Users\claud\OneDrive\Escritorio\Ironhack\Labs\week 3\Usa-Shootings-and-State-Parties\gva_mass_shootings-2025-10-21.csv")
df.rename(columns={"state":"State"},inplace=True)

In [3]:
years=list(range(2014,2025))
def get_edu(year):
    url_edu = f"https://api.census.gov/data/{year}/acs/acs1"
    params = {
        "get": "NAME,B15003_001E,B15003_002E,B15003_003E,B15003_004E,B15003_005E,"
        "B15003_006E,B15003_017E,B15003_022E,B15003_023E,B15003_025E",
        "for": "state:*"}
    response_1 = requests.get(url_edu, params=params)
    if response_1.status_code != 200:
        print(f"⚠️ {year}: HTTP {response_1.status_code} – skipping year")
        return pd.DataFrame()
    if not response_1.text.strip().startswith('[['):
        print(f"⚠️ {year}: invalid or empty response – skipping year")
        return pd.DataFrame()

    try:
        data = response_1.json()
    except Exception:
        print(f"⚠️ {year}: cannot decode JSON – skipping year")
        return pd.DataFrame()

    data = response_1.json()
    df_edu = pd.DataFrame(data[1:], columns=data[0])
    num_col=[column for column in df_edu.columns if column.startswith("B15003_")]
    df_edu[num_col] = df_edu[num_col].apply(pd.to_numeric, errors="coerce")

    df_edu = df_edu.rename(columns={
        "NAME": "State",
        "B15003_001E": "Population_25plus",
        "B15003_002E": "No_Schooling_Completed",
        "B15003_003E": "Nursery_to_4th_Grade",
        "B15003_004E": "5th_to_6th_Grade",
        "B15003_005E": "7th_to_8th_Grade",
        "B15003_006E": "9th_Grade",
        "B15003_017E": "High_School_Graduate",
        "B15003_022E": "Bachelor_Degree",
        "B15003_023E": "Master_Degree",
        "B15003_025E": "Doctorate_Degree","state": "State_Code"})

    col_min_studies=["No_Schooling_Completed",
        "Nursery_to_4th_Grade",
        "5th_to_6th_Grade",
        "7th_to_8th_Grade",
        "9th_Grade"]

    df_edu["Without Min Studies"] = df_edu[col_min_studies].sum(axis=1)

    df_edu.drop(columns=["No_Schooling_Completed",
     "Nursery_to_4th_Grade",
     "5th_to_6th_Grade",
     "7th_to_8th_Grade",
     "9th_Grade","State_Code"], inplace=True)
   
    df_edu["year"]=year
   
    df_edu = df_edu[[
        "year", "State", "Population_25plus",
        "Without Min Studies", "High_School_Graduate",
        "Bachelor_Degree", "Master_Degree", "Doctorate_Degree"
    ]]
    return df_edu
frame=[]
for year in years:
    print(f"Fetching {year}...")
    df_year = get_edu(year)
    if not df_year.empty:
        frame.append(df_year)
df_edu = pd.concat(frame, ignore_index=True)
df_edu



Fetching 2014...
Fetching 2015...
Fetching 2016...
Fetching 2017...
Fetching 2018...
Fetching 2019...
Fetching 2020...
⚠️ 2020: HTTP 404 – skipping year
Fetching 2021...
Fetching 2022...
Fetching 2023...
Fetching 2024...


Unnamed: 0,year,State,Population_25plus,Without Min Studies,High_School_Graduate,Bachelor_Degree,Master_Degree,Doctorate_Degree
0,2014,Alabama,3256766,48873,847523,478058,201391,33082
1,2014,Alaska,465149,4144,108523,85634,33103,4304
2,2014,Arizona,4436226,77042,899108,765784,328400,53578
3,2014,Arkansas,1968414,25814,568437,272411,104636,19380
4,2014,California,25654292,814956,4768478,5120162,2035257,394265
...,...,...,...,...,...,...,...,...
515,2024,Washington,5631460,105363,975478,1372926,683345,113323
516,2024,West Virginia,1268689,14987,420214,187714,88027,13183
517,2024,Wisconsin,4172638,48054,1057997,943201,362471,61628
518,2024,Wyoming,409344,4684,90634,82918,35122,8475


In [4]:
def total_pop(year):
    url = f"https://api.census.gov/data/{year}/acs/acs1"
    params = {"get": "NAME,B01003_001E", "for": "state:*"}
    r = requests.get(url, params=params)
    if r.status_code != 200:
        return pd.DataFrame()
    if not r.text.strip().startswith("[["):
        return pd.DataFrame()
    data= r.json()
    df_pop=pd.DataFrame(data[1:],columns=data[0])
    df_pop.rename(columns={
        "NAME": "State",
        "state": "State_Code",
        "B01003_001E": "Total_Population"},inplace=True)
    df_pop["year"]= year
    df_pop["Total_Population"] = pd.to_numeric(df_pop["Total_Population"],errors="coerce")
    return df_pop[["year","State","State_Code","Total_Population"]]


In [5]:
frames=[]
for year in years:
    edu=get_edu(year)
    pop=total_pop(year)
    if not edu.empty and not pop.empty:
        merged_edu=pd.merge(edu,pop, on=["year","State"],how="left")
        frames.append(merged_edu)
df_eduPop=pd.concat(frames,ignore_index=True)
df_eduPop.drop(columns=["State_Code"],inplace=True)
df_eduPop

⚠️ 2020: HTTP 404 – skipping year


Unnamed: 0,year,State,Population_25plus,Without Min Studies,High_School_Graduate,Bachelor_Degree,Master_Degree,Doctorate_Degree,Total_Population
0,2014,Alabama,3256766,48873,847523,478058,201391,33082,4849377
1,2014,Alaska,465149,4144,108523,85634,33103,4304,736732
2,2014,Arizona,4436226,77042,899108,765784,328400,53578,6731484
3,2014,Arkansas,1968414,25814,568437,272411,104636,19380,2966369
4,2014,California,25654292,814956,4768478,5120162,2035257,394265,38802500
...,...,...,...,...,...,...,...,...,...
515,2024,Washington,5631460,105363,975478,1372926,683345,113323,7958180
516,2024,West Virginia,1268689,14987,420214,187714,88027,13183,1769979
517,2024,Wisconsin,4172638,48054,1057997,943201,362471,61628,5960975
518,2024,Wyoming,409344,4684,90634,82918,35122,8475,587618
