In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option("mode.copy_on_write", True)

In [18]:
population_00_10 = pd.read_csv("population_data/co-est00int-tot.csv", encoding="latin1")

# Keep only county-level data (using SUMLEV == 50)
population_00_10 = population_00_10[population_00_10["SUMLEV"] == 50]

# Keep needed columns only - filter out the rest
year_cols = [col for col in population_00_10.columns if col.startswith("POPESTIMATE")]
keep_cols = ["STATE", "COUNTY", "STNAME", "CTYNAME"] + year_cols
population_00_10 = population_00_10[keep_cols]

# Melt
population_00_10_long = population_00_10.melt(
    id_vars=["STATE", "COUNTY", "STNAME", "CTYNAME"],
    value_vars=year_cols,
    var_name="year",
    value_name="population"
)

population_00_10_long["year"] = population_00_10_long["year"].str.extract(r"(\d+)").astype(int)

# Build FIPS
population_00_10_long["fips"] = (
    population_00_10_long["STATE"].astype(str).str.zfill(2) +
    population_00_10_long["COUNTY"].astype(str).str.zfill(3)
)

In [19]:
population_00_10_long

#what are state and county entries? fips

Unnamed: 0,STATE,COUNTY,STNAME,CTYNAME,year,population,fips
0,1,1,Alabama,Autauga County,2000,44021,01001
1,1,3,Alabama,Baldwin County,2000,141342,01003
2,1,5,Alabama,Barbour County,2000,29015,01005
3,1,7,Alabama,Bibb County,2000,19913,01007
4,1,9,Alabama,Blount County,2000,51107,01009
...,...,...,...,...,...,...,...
34568,56,37,Wyoming,Sweetwater County,2010,43621,56037
34569,56,39,Wyoming,Teton County,2010,21302,56039
34570,56,41,Wyoming,Uinta County,2010,21137,56041
34571,56,43,Wyoming,Washakie County,2010,8556,56043


In [20]:
population_10_19 = pd.read_csv("population_data/co-est2019-alldata.csv", encoding="latin1")

population_10_19 = population_10_19[population_10_19["SUMLEV"] == 50]

year_cols = [c for c in population_10_19.columns if c.startswith("POPESTIMATE")]
keep_cols = ["STATE", "COUNTY", "STNAME", "CTYNAME"] + year_cols
population_10_19 = population_10_19[keep_cols]

population_10_19_long = population_10_19.melt(
    id_vars=["STATE", "COUNTY", "STNAME", "CTYNAME"],
    value_vars=year_cols,
    var_name="year",
    value_name="population"
)

population_10_19_long["year"] = population_10_19_long["year"].str.extract(r"(\d+)").astype(int)

population_10_19_long["fips"] = (
    population_10_19_long["STATE"].astype(str).str.zfill(2) +
    population_10_19_long["COUNTY"].astype(str).str.zfill(3)
)


In [21]:
population_10_19_long

Unnamed: 0,STATE,COUNTY,STNAME,CTYNAME,year,population,fips
0,1,1,Alabama,Autauga County,2010,54773,01001
1,1,3,Alabama,Baldwin County,2010,183112,01003
2,1,5,Alabama,Barbour County,2010,27327,01005
3,1,7,Alabama,Bibb County,2010,22870,01007
4,1,9,Alabama,Blount County,2010,57376,01009
...,...,...,...,...,...,...,...
31415,56,37,Wyoming,Sweetwater County,2019,42343,56037
31416,56,39,Wyoming,Teton County,2019,23464,56039
31417,56,41,Wyoming,Uinta County,2019,20226,56041
31418,56,43,Wyoming,Washakie County,2019,7805,56043


In [22]:
population_20_24 = pd.read_csv("population_data/co-est2024-alldata.csv", encoding="latin1")

population_20_24 = population_20_24[population_20_24["SUMLEV"] == 50]

year_cols = [c for c in population_20_24.columns if c.startswith("POPESTIMATE")]
keep_cols = ["STATE", "COUNTY", "STNAME", "CTYNAME"] + year_cols
population_20_24 = population_20_24[keep_cols]

population_20_24_long = population_20_24.melt(
    id_vars=["STATE", "COUNTY", "STNAME", "CTYNAME"],
    value_vars=year_cols,
    var_name="year",
    value_name="population"
)

population_20_24_long["year"] = population_20_24_long["year"].str.extract(r"(\d+)").astype(int)

population_20_24_long["fips"] = (
    population_20_24_long["STATE"].astype(str).str.zfill(2) +
    population_20_24_long["COUNTY"].astype(str).str.zfill(3)
)


In [23]:
population_20_24_long

Unnamed: 0,STATE,COUNTY,STNAME,CTYNAME,year,population,fips
0,1,1,Alabama,Autauga County,2020,58909,01001
1,1,3,Alabama,Baldwin County,2020,233244,01003
2,1,5,Alabama,Barbour County,2020,24975,01005
3,1,7,Alabama,Bibb County,2020,22176,01007
4,1,9,Alabama,Blount County,2020,59110,01009
...,...,...,...,...,...,...,...
15715,56,37,Wyoming,Sweetwater County,2024,41273,56037
15716,56,39,Wyoming,Teton County,2024,23272,56039
15717,56,41,Wyoming,Uinta County,2024,20621,56041
15718,56,43,Wyoming,Washakie County,2024,7662,56043


In [24]:
#Final Merge
population_2000_2024 = pd.concat(
    [population_00_10_long, population_10_19_long, population_20_24_long],
    ignore_index=True
)

population_2000_2024 = population_2000_2024.sort_values(["fips", "year"])



In [25]:
population_2000_2024

Unnamed: 0,STATE,COUNTY,STNAME,CTYNAME,year,population,fips
0,1,1,Alabama,Autauga County,2000,44021,01001
3143,1,1,Alabama,Autauga County,2001,44889,01001
6286,1,1,Alabama,Autauga County,2002,45909,01001
9429,1,1,Alabama,Autauga County,2003,46800,01001
12572,1,1,Alabama,Autauga County,2004,48366,01001
...,...,...,...,...,...,...,...
69136,56,45,Wyoming,Weston County,2020,6817,56045
72280,56,45,Wyoming,Weston County,2021,6747,56045
75424,56,45,Wyoming,Weston County,2022,6872,56045
78568,56,45,Wyoming,Weston County,2023,6828,56045


In [None]:
#Check for missing values
population_2000_2024.isna().sum()


STATE         0
COUNTY        0
STNAME        0
CTYNAME       0
year          0
population    0
fips          0
dtype: int64

In [27]:
#Filter required states 
states_to_keep = [  #can be done using fips too - "01","12","13","37","45","53"
    "Alabama",
    "Florida",
    "Georgia",
    "North Carolina",
    "South Carolina",
    "Washington"
]

pop_filtered = population_2000_2024[
    population_2000_2024["STNAME"].isin(states_to_keep)
].copy()

pop_filtered.head()


Unnamed: 0,STATE,COUNTY,STNAME,CTYNAME,year,population,fips
0,1,1,Alabama,Autauga County,2000,44021,1001
3143,1,1,Alabama,Autauga County,2001,44889,1001
6286,1,1,Alabama,Autauga County,2002,45909,1001
9429,1,1,Alabama,Autauga County,2003,46800,1001
12572,1,1,Alabama,Autauga County,2004,48366,1001


In [None]:
#Check states
pop_filtered["STNAME"].unique()

array(['Alabama', 'Florida', 'Georgia', 'North Carolina',
       'South Carolina', 'Washington'], dtype=object)

In [29]:
#Check years
sorted(pop_filtered["year"].unique())

[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019,
 2020,
 2021,
 2022,
 2023,
 2024]

In [None]:
#Check if fips values are present for all states
pop_filtered["fips"].str.len().unique()

array([5], dtype=int64)