In [1]:
import pandas as pd

# Land Area, Population Estimate, Population 
- Land Area (2010) - census
- Population Estimate (2016, 2017, 2018, 2019) - usda
- Population Census (2020) - usda

Acknowledgment to Ken Noppinger:<br>
https://github.com/knoppin1/DATA-606-Capstone

## 1.1 Read land area

In [2]:
df_land = pd.read_excel('https://www2.census.gov/library/publications/2011/compendia/usa-counties/excel/LND01.xls')
df_land.head()

Unnamed: 0,Areaname,STCOU,LND010190F,LND010190D,LND010190N1,LND010190N2,LND010200F,LND010200D,LND010200N1,LND010200N2,...,LND110210N1,LND110210N2,LND210190F,LND210190D,LND210190N1,LND210190N2,LND210200F,LND210200D,LND210200N1,LND210200N2
0,UNITED STATES,0,0,3787425.08,0,0,0,3794083.06,0,0,...,0,0,0,251083.35,0,0,0,256644.62,0,0
1,ALABAMA,1000,0,52422.94,0,0,0,52419.02,0,0,...,0,0,0,1672.71,0,0,0,1675.01,0,0
2,"Autauga, AL",1001,0,604.49,0,0,0,604.45,0,0,...,0,0,0,8.48,0,0,0,8.48,0,0
3,"Baldwin, AL",1003,0,2027.08,0,0,0,2026.93,0,0,...,0,0,0,430.55,0,0,0,430.58,0,0
4,"Barbour, AL",1005,0,904.59,0,0,0,904.52,0,0,...,0,0,0,19.59,0,0,0,19.61,0,0


## 1.2 Cleanup columns
Extract the needed columns: <br>
- Areaname - county and state
- STCOU - FIPS code for state and county
- LND110210D - LND - land area; 110- subgroup; 210 - 2(a year in the 21st century); 10(last two digits of the year); D - data. Therefore, this is a data gathered from 2010 in sqm.

In [3]:
df_land_area = df_land[['Areaname', 'STCOU', 'LND110210D']]
df_land_area.head(5)

Unnamed: 0,Areaname,STCOU,LND110210D
0,UNITED STATES,0,3531905.43
1,ALABAMA,1000,50645.33
2,"Autauga, AL",1001,594.44
3,"Baldwin, AL",1003,1589.78
4,"Barbour, AL",1005,884.88


In [4]:
df_land_area.info()   

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3198 entries, 0 to 3197
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Areaname    3198 non-null   object 
 1   STCOU       3198 non-null   int64  
 2   LND110210D  3198 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 75.1+ KB


In [5]:
# Rename STCOU to FIPStxt
df_land_area.rename(columns = {'STCOU': "FIPStxt"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [6]:
us_land_area = df_land_area[df_land_area.Areaname == 'UNITED STATES']
us_land_area

Unnamed: 0,Areaname,FIPStxt,LND110210D
0,UNITED STATES,0,3531905.43


In [7]:
# Remove the All states data from the State column
states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
          'Connecticut','Delaware', 'Florida', 'Georgia',
          'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky',
          'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 
          'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 
          'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 
          'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 
          'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 
          'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']
len(states)

50

In [8]:
# Convert states to upper case
states1 = [x.upper() for x in states]
print("This is the first five states in the list that's capitalized")
states1[0:5]

This is the first five states in the list that's capitalized


['ALABAMA', 'ALASKA', 'ARIZONA', 'ARKANSAS', 'CALIFORNIA']

In [9]:
all_states_land_area = df_land_area[df_land_area['Areaname'].isin(states1)]
print("Number of all states: ", len(all_states_land_area['Areaname'].unique()))

Number of all states:  50


In [10]:
print("This dataset still includes District of Columbia")
df_land_area[df_land_area.Areaname == 'DISTRICT OF COLUMBIA']

This dataset still includes District of Columbia


Unnamed: 0,Areaname,FIPStxt,LND110210D
328,DISTRICT OF COLUMBIA,11000,61.05


In [11]:
# Drop the rows with US and State level data
df_land_area = df_land_area[df_land_area.Areaname != 'UNITED STATES']
df_land_area = df_land_area[~df_land_area['Areaname'].isin(states1)].reset_index(drop=True)
df_land_area.head()

Unnamed: 0,Areaname,FIPStxt,LND110210D
0,"Autauga, AL",1001,594.44
1,"Baldwin, AL",1003,1589.78
2,"Barbour, AL",1005,884.88
3,"Bibb, AL",1007,622.58
4,"Blount, AL",1009,644.78


In [12]:
[row for row in df_land_area.duplicated(keep='last') if row == True]

[]

## 1.3 Verify Missing Data

In [13]:
df_land_area.info()  

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3147 entries, 0 to 3146
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Areaname    3147 non-null   object 
 1   FIPStxt     3147 non-null   int64  
 2   LND110210D  3147 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 73.9+ KB


## 2.1 - Read Population Estimate (2016 - 2019)

In [14]:
df_pop_est = pd.read_excel('https://www.ers.usda.gov/webdocs/DataFiles/48747/PopulationEstimates.xls', header=2)
df_pop_est.head(5)

Unnamed: 0,FIPStxt,State,Area_Name,Rural-urban_Continuum Code_2003,Rural-urban_Continuum Code_2013,Urban_Influence_Code_2003,Urban_Influence_Code_2013,Economic_typology_2015,CENSUS_2010_POP,ESTIMATES_BASE_2010,...,R_DOMESTIC_MIG_2019,R_NET_MIG_2011,R_NET_MIG_2012,R_NET_MIG_2013,R_NET_MIG_2014,R_NET_MIG_2015,R_NET_MIG_2016,R_NET_MIG_2017,R_NET_MIG_2018,R_NET_MIG_2019
0,0,US,United States,,,,,,308745538,308758105,...,,,,,,,,,,
1,1000,AL,Alabama,,,,,,4779736,4780125,...,1.917501,0.578434,1.186314,1.522549,0.563489,0.626357,0.745172,1.090366,1.773786,2.483744
2,1001,AL,Autauga County,2.0,2.0,2.0,2.0,0.0,54571,54597,...,4.84731,6.018182,-6.226119,-3.902226,1.970443,-1.712875,4.777171,0.849656,0.540916,4.560062
3,1003,AL,Baldwin County,4.0,3.0,5.0,2.0,5.0,182265,182265,...,24.017829,16.64187,17.488579,22.751474,20.184334,17.725964,21.279291,22.398256,24.727215,24.380567
4,1005,AL,Barbour County,6.0,6.0,6.0,6.0,3.0,27457,27455,...,-5.690302,0.292676,-6.897817,-8.132185,-5.140431,-15.724575,-18.238016,-24.998528,-8.754922,-5.165664


In [15]:
df_pop_est.columns[0:20]

Index(['FIPStxt', 'State', 'Area_Name', 'Rural-urban_Continuum Code_2003',
       'Rural-urban_Continuum Code_2013', 'Urban_Influence_Code_2003',
       'Urban_Influence_Code_2013', 'Economic_typology_2015',
       'CENSUS_2010_POP', 'ESTIMATES_BASE_2010', 'POP_ESTIMATE_2010',
       'POP_ESTIMATE_2011', 'POP_ESTIMATE_2012', 'POP_ESTIMATE_2013',
       'POP_ESTIMATE_2014', 'POP_ESTIMATE_2015', 'POP_ESTIMATE_2016',
       'POP_ESTIMATE_2017', 'POP_ESTIMATE_2018', 'POP_ESTIMATE_2019'],
      dtype='object')

## 2.2 Cleanup the columns
Extract only the needed columns:
- FIPStxt - FIPS code for state and county
- State - state abbreviation
- Area_Name - county
- POP_ESTIMATE_2016 - population estimate for 2016
- POP_ESTIMATE_2017 - population estimate for 2017
- POP_ESTIMATE_2018 - population estimate for 2018
- POP_ESTIMATE_2019 - population estimate for 2019

In [16]:
df_pop_16_19 = df_pop_est[['FIPStxt', 'State', 'Area_Name','POP_ESTIMATE_2016',
       'POP_ESTIMATE_2017', 'POP_ESTIMATE_2018', 'POP_ESTIMATE_2019']]
df_pop_16_19.head()

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2016,POP_ESTIMATE_2017,POP_ESTIMATE_2018,POP_ESTIMATE_2019
0,0,US,United States,322941311,324985539,326687501,328239523
1,1000,AL,Alabama,4863525,4874486,4887681,4903185
2,1001,AL,Autauga County,55243,55390,55533,55869
3,1003,AL,Baldwin County,207601,212521,217855,223234
4,1005,AL,Barbour County,25806,25157,24872,24686


In [17]:
# Check the number of states
print("These are the number of states", len(df_pop_16_19['State'].unique()))
print("These are the states: ")
print(df_pop_16_19['State'].unique())

These are the number of states 53
These are the states: 
['US' 'AL' 'AK' 'AZ' 'AR' 'CA' 'CO' 'CT' 'DE' 'DC' 'FL' 'GA' 'HI' 'ID'
 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MD' 'MA' 'MI' 'MN' 'MS' 'MO' 'MT'
 'NE' 'NV' 'NH' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OK' 'OR' 'PA' 'RI' 'SC'
 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' 'WY' 'PR']


In [18]:
# Save the US and All states in seperate files from the county files 
us_pop_16_19 = df_pop_16_19[df_pop_16_19.State == 'US']
us_pop_16_19

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2016,POP_ESTIMATE_2017,POP_ESTIMATE_2018,POP_ESTIMATE_2019
0,0,US,United States,322941311,324985539,326687501,328239523


In [19]:
all_states_pop_16_19 = df_pop_16_19[df_pop_16_19['Area_Name'].isin(states)]
all_states_pop_16_19.sample(5)

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2016,POP_ESTIMATE_2017,POP_ESTIMATE_2018,POP_ESTIMATE_2019
558,15000,HI,Hawaii,1427559,1424393,1420593,1415872
398,13000,GA,Georgia,10301890,10410330,10511131,10617423
905,20000,KS,Kansas,2910844,2908718,2911359,2913314
2357,45000,SC,South Carolina,4957968,5021268,5084156,5148714
99,4000,AZ,Arizona,6941072,7044008,7158024,7278717


In [20]:
# Drop the rows with US, one District of Columbia and State level data
df_pop_16_19 = df_pop_16_19[df_pop_16_19.State != 'US']
df_pop_16_19= df_pop_16_19[df_pop_16_19.State != 'PR']
df_pop_16_19 = df_pop_16_19[~df_pop_16_19['Area_Name'].isin(states)].reset_index(drop=True)
df_pop_16_19.head()

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2016,POP_ESTIMATE_2017,POP_ESTIMATE_2018,POP_ESTIMATE_2019
0,1001,AL,Autauga County,55243,55390,55533,55869
1,1003,AL,Baldwin County,207601,212521,217855,223234
2,1005,AL,Barbour County,25806,25157,24872,24686
3,1007,AL,Bibb County,22586,22550,22367,22394
4,1009,AL,Blount County,57494,57787,57771,57826


In [21]:
# Find and drop duplicate rows
[row for row in df_pop_16_19.duplicated(keep='last') if row == True]

[]

## 2.3 Verify Missing Data

In [22]:
df_pop_16_19.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3143 entries, 0 to 3142
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   FIPStxt            3143 non-null   int64 
 1   State              3143 non-null   object
 2   Area_Name          3143 non-null   object
 3   POP_ESTIMATE_2016  3143 non-null   int64 
 4   POP_ESTIMATE_2017  3143 non-null   int64 
 5   POP_ESTIMATE_2018  3143 non-null   int64 
 6   POP_ESTIMATE_2019  3143 non-null   int64 
dtypes: int64(5), object(2)
memory usage: 172.0+ KB


In [23]:
print("There are ", len(df_pop_16_19['State'].unique()), " states.")
df_pop_16_19.State.unique()   

There are  51  states.


array(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
       'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
       'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
       'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'], dtype=object)

This record contains 51 states which includes District of Columbia; Puerto Rico is excluded

## 3.1 Read Population (2020)

In [24]:
df_pop_20 = pd.read_csv('https://www.ers.usda.gov/webdocs/DataFiles/48747/PopulationEstimates.csv?v=390.6')
df_pop_20.head(10)

Unnamed: 0,FIPStxt,State,Area name,Rural-urban continuum code 2013,Attribute,Value
0,0,US,United States,,Population 1990,248790925
1,0,US,United States,,Population 2000,281424600
2,0,US,United States,,Population 2010,308745538
3,0,US,United States,,Population 2020,331449281
4,1000,AL,Alabama,,Population 1990,4040389
5,1000,AL,Alabama,,Population 2000,4447207
6,1000,AL,Alabama,,Population 2010,4779736
7,1000,AL,Alabama,,Population 2020,5024279
8,1001,AL,Autauga County,2.0,Population 1990,34222
9,1001,AL,Autauga County,2.0,Population 2000,43751


## 3.2 Cleanup the columns

In [25]:
# Identify the columns
df_pop_20.columns

Index(['FIPStxt', 'State', 'Area name', 'Rural-urban continuum code 2013',
       'Attribute', 'Value'],
      dtype='object')

In [26]:
# Identify the unique contents of Attribute
df_pop_20['Attribute'].unique()

array(['Population 1990', 'Population 2000', 'Population 2010',
       'Population 2020'], dtype=object)

In [27]:
# Before cleaning, the attribute column will only have Population 2000
df_pop_20 = df_pop_20[df_pop_20.Attribute == 'Population 2020'].reset_index(drop=True)
df_pop_20

Unnamed: 0,FIPStxt,State,Area name,Rural-urban continuum code 2013,Attribute,Value
0,0,US,United States,,Population 2020,331449281
1,1000,AL,Alabama,,Population 2020,5024279
2,1001,AL,Autauga County,2.0,Population 2020,58805
3,1003,AL,Baldwin County,3.0,Population 2020,231767
4,1005,AL,Barbour County,6.0,Population 2020,25223
...,...,...,...,...,...,...
3269,72145,PR,Vega Baja Municipio,1.0,Population 2020,54414
3270,72147,PR,Vieques Municipio,7.0,Population 2020,8249
3271,72149,PR,Villalba Municipio,2.0,Population 2020,22093
3272,72151,PR,Yabucoa Municipio,1.0,Population 2020,30426


In [28]:
# Keep FIPStxt, State, Area_name, and value. 
df_pop_20 = df_pop_20[['FIPStxt', 'State', 'Area name', 'Value']]

In [29]:
# Rename Value to Pop2020
df_pop_20.rename(columns = {'Value': 'Pop_2020'}, inplace = True)
df_pop_20.head(5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,FIPStxt,State,Area name,Pop_2020
0,0,US,United States,331449281
1,1000,AL,Alabama,5024279
2,1001,AL,Autauga County,58805
3,1003,AL,Baldwin County,231767
4,1005,AL,Barbour County,25223


In [30]:
# US population for 2020 
us_pop_20 = df_pop_20[df_pop_20.State == 'US']
us_pop_20

Unnamed: 0,FIPStxt,State,Area name,Pop_2020
0,0,US,United States,331449281


In [31]:
# Keep the all state population for 2020
all_states_pop_20 = df_pop_20[df_pop_20['Area name'].isin(states)]
len(all_states_pop_20)

50

In [32]:
# Remove the United States, all state, Puerto Rico, and duplicates
# Drop the rows with US, one District of Columbia and State level data
df_pop_20 = df_pop_20[df_pop_20.State != 'US']
df_pop_20 = df_pop_20[df_pop_20.State != 'PR']
df_pop_20 = df_pop_20[~df_pop_20['Area name'].isin(states)].reset_index(drop=True)
df_pop_20.head()

Unnamed: 0,FIPStxt,State,Area name,Pop_2020
0,1001,AL,Autauga County,58805
1,1003,AL,Baldwin County,231767
2,1005,AL,Barbour County,25223
3,1007,AL,Bibb County,22293
4,1009,AL,Blount County,59134


In [33]:
# Find and drop duplicate rows
[row for row in df_pop_20.duplicated(keep='last') if row == True]

[]

## 3.3 Verify Missing Data

In [34]:
df_pop_20.info()   

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3144 entries, 0 to 3143
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   FIPStxt    3144 non-null   int64 
 1   State      3144 non-null   object
 2   Area name  3144 non-null   object
 3   Pop_2020   3144 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 98.4+ KB


## 4 Merge Land Area, Population 2016 to 2019, Population 2020
### 4.1 Clean FIPS Codes
Compare all the datasets FIPS cpde before joining to the dataframe

In [35]:
# Land area data's FIPS code
print('This is the number of unique FIPS code in land area: ', len(df_land_area['FIPStxt'].unique()))

This is the number of unique FIPS code in land area:  3147


In [36]:
# Population 2016-2019 data's FIPS code
print('This is the number of unique FIPS code in population 2016-2019: ', len(df_pop_16_19.FIPStxt.unique()))

This is the number of unique FIPS code in population 2016-2019:  3143


In [37]:
# Population 2020 data's FIPS code
print('This is the number of unique FIPS code in population 2020: ', len(df_pop_20.FIPStxt.unique()))

This is the number of unique FIPS code in population 2020:  3144


In [38]:
# Check the value in the Population 2020 but not in Population 2016-2019
missing_pop_16_19 = df_pop_20[~df_pop_20.FIPStxt.isin(df_pop_16_19.FIPStxt.values)]
missing_pop_16_19

Unnamed: 0,FIPStxt,State,Area name,Pop_2020
72,2063,AK,Chugach Census Area,7102
73,2066,AK,Copper River Census Area,2617


In [39]:
# Check the value in the Population 2016-2019 but not in land area
missing_pop_16_19_land = df_pop_16_19[~df_pop_16_19.FIPStxt.isin(df_land_area.FIPStxt.values)]
missing_pop_16_19_land

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2016,POP_ESTIMATE_2017,POP_ESTIMATE_2018,POP_ESTIMATE_2019
81,2158,AK,Kusilvak Census Area,8181,8236,8323,8314
2413,46102,SD,Oglala Lakota County,14425,14384,14331,14177


In [40]:
# Check the value in the land but not in Population 2020
missing_land_pop_20 = df_land_area[~df_land_area.FIPStxt.isin(df_pop_20.FIPStxt.values)]
missing_land_pop_20

Unnamed: 0,Areaname,FIPStxt,LND110210D
91,"Valdez-Cordova, AK",2261,34239.88
92,"Wade Hampton, AK",2270,17081.43
1655,"Yellowstone National Park, MT",30113,0.0
2419,"Shannon, SD",46113,2093.9
2918,"Bedford, VA",51515,6.88
2923,"Clifton Forge, VA",51560,0.0
2951,"South Boston, VA",51780,0.0


In [41]:
# Check the value in the Population 2016-2019 but not in land area
missing_pop_20_land = df_pop_20[~df_pop_20.FIPStxt.isin(df_land_area.FIPStxt.values)]
missing_pop_20_land

Unnamed: 0,FIPStxt,State,Area name,Pop_2020
72,2063,AK,Chugach Census Area,7102
73,2066,AK,Copper River Census Area,2617
83,2158,AK,Kusilvak Census Area,8368
2414,46102,SD,Oglala Lakota County,13672


In [42]:
# Check the value in the land but not in Population 2020
missing_land_pop_16_19 = df_land_area[~df_land_area.FIPStxt.isin(df_pop_16_19.FIPStxt.values)]
missing_land_pop_16_19

Unnamed: 0,Areaname,FIPStxt,LND110210D
92,"Wade Hampton, AK",2270,17081.43
1655,"Yellowstone National Park, MT",30113,0.0
2419,"Shannon, SD",46113,2093.9
2918,"Bedford, VA",51515,6.88
2923,"Clifton Forge, VA",51560,0.0
2951,"South Boston, VA",51780,0.0


Based on the records, the results doesn't have land, incomplete population, nor non-county (area).
## 3.2 Merge Dataframes
The technique used for merging of three dataframe was suggested by cwharland and can be found in https://stackoverflow.com/questions/23668427/pandas-three-way-joining-multiple-dataframes-on-columns

In [45]:
df = df_pop_20.merge(df_pop_16_19, on='FIPStxt').merge(df_land_area, on='FIPStxt').drop(['State_x', 'Area_Name', 'Areaname'], axis=1)
df.sample(5)

Unnamed: 0,FIPStxt,Area name,Pop_2020,State_y,POP_ESTIMATE_2016,POP_ESTIMATE_2017,POP_ESTIMATE_2018,POP_ESTIMATE_2019,LND110210D
2066,39049,Franklin County,1323807,OH,1274450,1295510,1307698,1316756,532.19
2327,45025,Chesterfield County,43273,SC,46120,45979,45881,45650,799.08
1426,28053,Humphreys County,7785,MS,8588,8333,8256,8064,418.49
2432,47013,Campbell County,39272,TN,39784,39791,39795,39842,480.19
2892,51157,Rappahannock County,7348,VA,7380,7393,7345,7370,266.23


In [46]:
df.shape

(3140, 9)

In [47]:
df.columns

Index(['FIPStxt', 'Area name', 'Pop_2020', 'State_y', 'POP_ESTIMATE_2016',
       'POP_ESTIMATE_2017', 'POP_ESTIMATE_2018', 'POP_ESTIMATE_2019',
       'LND110210D'],
      dtype='object')

In [48]:
# Rename all the columns
df.rename(columns = {'FIPStxt': 'FIPS', 'Area name': 'county', "State_y": 'state',
                     'POP_ESTIMATE_2016':'pop_2016', 'POP_ESTIMATE_2017':'pop_2017',
                     'POP_ESTIMATE_2018': 'pop_2018', 'POP_ESTIMATE_2019': 'pop_2019',
                     'Pop_2020':'pop_2020', 'LND110210D': 'land_area'}, inplace= True)

In [49]:
# Reindex all the columns
df = df.reindex(columns= ['FIPS', 'county', 'state', 'land_area', 'pop_2016', 'pop_2017', 
                     'pop_2018', 'pop_2019', 'pop_2020'])
df.head(5)

Unnamed: 0,FIPS,county,state,land_area,pop_2016,pop_2017,pop_2018,pop_2019,pop_2020
0,1001,Autauga County,AL,594.44,55243,55390,55533,55869,58805
1,1003,Baldwin County,AL,1589.78,207601,212521,217855,223234,231767
2,1005,Barbour County,AL,884.88,25806,25157,24872,24686,25223
3,1007,Bibb County,AL,622.58,22586,22550,22367,22394,22293
4,1009,Blount County,AL,644.78,57494,57787,57771,57826,59134


In [50]:
df.to_csv('Pop_Land.csv', index=False)