# Population Density Prep

This notebook uses the 'estimatePop.xlsx' and 'LND02.xls' (for land area) to calculate population density for 2018.

# Setup

## Imports

In [2]:
import pandas as pd
import numpy as np

## Parameters

In [3]:
EST_POP_DATASET = "../../../data/RQ3/raw/estimatedPop.xlsx"
LAND_AREA_DATASET = "../../../data/RQ3/raw/LND02.xls"
PROCESSED_STATE_POP_DEN_DATASET = "../../../data/RQ3/processed/state_pop_den_df.csv"
PROCESSED_MSA_POP_DEN_DATASET = "../../../data/RQ3/processed/msa_pop_den_df.csv"

# Loading the Dataset

In [4]:
est_pop_df = pd.read_excel(EST_POP_DATASET, header=3, skipfooter=6) 
land_area_df = pd.read_excel(LAND_AREA_DATASET)

In [5]:
est_pop_df

Unnamed: 0.1,Unnamed: 0,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,United States,308745538,308758105,309321666,311556874,313830990,315993715,318301008,320635163,322941311,324985539,326687501,328239523
1,".Autauga County, Alabama",54571,54597,54773,55227,54954,54727,54893,54864,55243,55390,55533,55869
2,".Baldwin County, Alabama",182265,182265,183112,186558,190145,194885,199183,202939,207601,212521,217855,223234
3,".Barbour County, Alabama",27457,27455,27327,27341,27169,26937,26755,26283,25806,25157,24872,24686
4,".Bibb County, Alabama",22915,22915,22870,22745,22667,22521,22553,22566,22586,22550,22367,22394
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3138,".Sweetwater County, Wyoming",43806,43806,43574,43986,45002,45157,44948,44719,44222,43464,42858,42343
3139,".Teton County, Wyoming",21294,21298,21296,21414,21624,22315,22773,23047,23234,23384,23269,23464
3140,".Uinta County, Wyoming",21118,21121,21089,20896,20996,20951,20822,20763,20682,20431,20292,20226
3141,".Washakie County, Wyoming",8533,8528,8530,8449,8409,8413,8273,8278,8165,8010,7877,7805


In [6]:
land_area_df

Unnamed: 0,Areaname,STCOU,LND010190F,LND010190D,LND010190N1,LND010190N2,LND010200F,LND010200D,LND010200N1,LND010200N2,...,LND110210N1,LND110210N2,LND210190F,LND210190D,LND210190N1,LND210190N2,LND210200F,LND210200D,LND210200N1,LND210200N2
0,UNITED STATES,0,0,3787425.08,0,0,0,3794083.06,0,0,...,0,0,0,251083.35,0,0,0,256644.62,0,0
1,ALABAMA,1000,0,52422.94,0,0,0,52419.02,0,0,...,0,0,0,1672.71,0,0,0,1675.01,0,0
2,"Autauga, AL",1001,0,604.49,0,0,0,604.45,0,0,...,0,0,0,8.48,0,0,0,8.48,0,0
3,"Baldwin, AL",1003,0,2027.08,0,0,0,2026.93,0,0,...,0,0,0,430.55,0,0,0,430.58,0,0
4,"Barbour, AL",1005,0,904.59,0,0,0,904.52,0,0,...,0,0,0,19.59,0,0,0,19.61,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3193,"Sweetwater, WY",56037,0,10491.73,0,0,0,10491.17,0,0,...,0,0,0,65.86,0,0,0,65.87,0,0
3194,"Teton, WY",56039,0,4221.96,0,0,0,4221.80,0,0,...,0,0,0,214.02,0,0,0,214.04,0,0
3195,"Uinta, WY",56041,0,2087.66,0,0,0,2087.56,0,0,...,0,0,0,5.91,0,0,0,5.90,0,0
3196,"Washakie, WY",56043,0,2242.85,0,0,0,2242.75,0,0,...,0,0,0,2.69,0,0,0,2.69,0,0


### First get the estimated population for 2018

In [7]:
est_pop_df.tail()

Unnamed: 0.1,Unnamed: 0,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
3138,".Sweetwater County, Wyoming",43806,43806,43574,43986,45002,45157,44948,44719,44222,43464,42858,42343
3139,".Teton County, Wyoming",21294,21298,21296,21414,21624,22315,22773,23047,23234,23384,23269,23464
3140,".Uinta County, Wyoming",21118,21121,21089,20896,20996,20951,20822,20763,20682,20431,20292,20226
3141,".Washakie County, Wyoming",8533,8528,8530,8449,8409,8413,8273,8278,8165,8010,7877,7805
3142,".Weston County, Wyoming",7208,7208,7198,7142,7077,7136,7138,7208,7220,6968,6924,6927


In [8]:
est_pop_df.columns

Index([    'Unnamed: 0',         'Census', 'Estimates Base',             2010,
                   2011,             2012,             2013,             2014,
                   2015,             2016,             2017,             2018,
                   2019],
      dtype='object')

In [9]:
est_pop_df = est_pop_df.rename(columns = {"Unnamed: 0": "Area", est_pop_df[est_pop_df.columns[11]].name: "2018"})
est_pop_df.head()

Unnamed: 0,Area,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,United States,308745538,308758105,309321666,311556874,313830990,315993715,318301008,320635163,322941311,324985539,326687501,328239523
1,".Autauga County, Alabama",54571,54597,54773,55227,54954,54727,54893,54864,55243,55390,55533,55869
2,".Baldwin County, Alabama",182265,182265,183112,186558,190145,194885,199183,202939,207601,212521,217855,223234
3,".Barbour County, Alabama",27457,27455,27327,27341,27169,26937,26755,26283,25806,25157,24872,24686
4,".Bibb County, Alabama",22915,22915,22870,22745,22667,22521,22553,22566,22586,22550,22367,22394


In [10]:
key_cols = ['Area', '2018']
est_pop_df = est_pop_df[key_cols]
est_pop_df.head()

Unnamed: 0,Area,2018
0,United States,326687501
1,".Autauga County, Alabama",55533
2,".Baldwin County, Alabama",217855
3,".Barbour County, Alabama",24872
4,".Bibb County, Alabama",22367


In [11]:
#Copy for later work
est_pop_df2 = est_pop_df.copy()

### Store the US population estimate as a whole

In [12]:
US_pop = est_pop_df['2018'].iloc[0]
US_pop

326687501

### Create a dataset for the states

In [13]:
#Now remove US because we don't need it for the states
est_pop_df = est_pop_df.drop(0)
est_pop_df.head()

Unnamed: 0,Area,2018
1,".Autauga County, Alabama",55533
2,".Baldwin County, Alabama",217855
3,".Barbour County, Alabama",24872
4,".Bibb County, Alabama",22367
5,".Blount County, Alabama",57771


In [14]:
#Need to process Area strings and groupby county
est_pop_df['Area'] = est_pop_df['Area'].str.split(',').str[1]
est_pop_df['Area'] = est_pop_df['Area'].str.upper()
est_pop_df['Area'] = est_pop_df['Area'].str.strip()
est_pop_df

Unnamed: 0,Area,2018
1,ALABAMA,55533
2,ALABAMA,217855
3,ALABAMA,24872
4,ALABAMA,22367
5,ALABAMA,57771
...,...,...
3138,WYOMING,42858
3139,WYOMING,23269
3140,WYOMING,20292
3141,WYOMING,7877


In [15]:
est_pop_df = est_pop_df.rename(columns = {"Area": "State", "2018": "Population"})
est_pop_df

Unnamed: 0,State,Population
1,ALABAMA,55533
2,ALABAMA,217855
3,ALABAMA,24872
4,ALABAMA,22367
5,ALABAMA,57771
...,...,...
3138,WYOMING,42858
3139,WYOMING,23269
3140,WYOMING,20292
3141,WYOMING,7877


In [16]:
est_pop_df = est_pop_df.groupby(['State']).sum()
est_pop_df

Unnamed: 0_level_0,Population
State,Unnamed: 1_level_1
ALABAMA,4887681
ALASKA,735139
ARIZONA,7158024
ARKANSAS,3009733
CALIFORNIA,39461588
COLORADO,5691287
CONNECTICUT,3571520
DELAWARE,965479
DISTRICT OF COLUMBIA,701547
FLORIDA,21244317


### Now preparing land area

In [17]:
land_area_df

Unnamed: 0,Areaname,STCOU,LND010190F,LND010190D,LND010190N1,LND010190N2,LND010200F,LND010200D,LND010200N1,LND010200N2,...,LND110210N1,LND110210N2,LND210190F,LND210190D,LND210190N1,LND210190N2,LND210200F,LND210200D,LND210200N1,LND210200N2
0,UNITED STATES,0,0,3787425.08,0,0,0,3794083.06,0,0,...,0,0,0,251083.35,0,0,0,256644.62,0,0
1,ALABAMA,1000,0,52422.94,0,0,0,52419.02,0,0,...,0,0,0,1672.71,0,0,0,1675.01,0,0
2,"Autauga, AL",1001,0,604.49,0,0,0,604.45,0,0,...,0,0,0,8.48,0,0,0,8.48,0,0
3,"Baldwin, AL",1003,0,2027.08,0,0,0,2026.93,0,0,...,0,0,0,430.55,0,0,0,430.58,0,0
4,"Barbour, AL",1005,0,904.59,0,0,0,904.52,0,0,...,0,0,0,19.59,0,0,0,19.61,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3193,"Sweetwater, WY",56037,0,10491.73,0,0,0,10491.17,0,0,...,0,0,0,65.86,0,0,0,65.87,0,0
3194,"Teton, WY",56039,0,4221.96,0,0,0,4221.80,0,0,...,0,0,0,214.02,0,0,0,214.04,0,0
3195,"Uinta, WY",56041,0,2087.66,0,0,0,2087.56,0,0,...,0,0,0,5.91,0,0,0,5.90,0,0
3196,"Washakie, WY",56043,0,2242.85,0,0,0,2242.75,0,0,...,0,0,0,2.69,0,0,0,2.69,0,0


In [18]:
key_cols = ['Areaname','LND110210D'] # 'LND110210D' is the column name for "land area in square miles 2010"
land_area_df = land_area_df[key_cols]
land_area_df2 = land_area_df.copy()
land_area_df = land_area_df.rename(columns = {"Areaname": "State", "LND110210D": "Land Area"})
land_area_df.head()

Unnamed: 0,State,Land Area
0,UNITED STATES,3531905.43
1,ALABAMA,50645.33
2,"Autauga, AL",594.44
3,"Baldwin, AL",1589.78
4,"Barbour, AL",884.88


In [19]:
US_land_area = land_area_df['Land Area'].iloc[0]
US_land_area

3531905.43

In [20]:
land_area_df = land_area_df.drop(0)
land_area_df

Unnamed: 0,State,Land Area
1,ALABAMA,50645.33
2,"Autauga, AL",594.44
3,"Baldwin, AL",1589.78
4,"Barbour, AL",884.88
5,"Bibb, AL",622.58
...,...,...
3193,"Sweetwater, WY",10426.65
3194,"Teton, WY",3995.38
3195,"Uinta, WY",2081.26
3196,"Washakie, WY",2238.55


In [21]:
land_area_df = land_area_df[~land_area_df['State'].str.contains(',')]
land_area_df

Unnamed: 0,State,Land Area
1,ALABAMA,50645.33
69,ALASKA,570640.95
99,ARIZONA,113594.08
115,ARKANSAS,52035.48
191,CALIFORNIA,155779.22
250,COLORADO,103641.89
315,CONNECTICUT,4842.36
324,DELAWARE,1948.54
328,DISTRICT OF COLUMBIA,61.05
329,District of Columbia,61.05


In [22]:
#District of columbia has been included twice, so we need to remove one
land_area_df = land_area_df.drop(329)
land_area_df.shape

(51, 2)

In [23]:
land_area_df = land_area_df.set_index('State')
land_area_df

Unnamed: 0_level_0,Land Area
State,Unnamed: 1_level_1
ALABAMA,50645.33
ALASKA,570640.95
ARIZONA,113594.08
ARKANSAS,52035.48
CALIFORNIA,155779.22
COLORADO,103641.89
CONNECTICUT,4842.36
DELAWARE,1948.54
DISTRICT OF COLUMBIA,61.05
FLORIDA,53624.76


### Create Population density data

In [24]:
df = pd.DataFrame(index=est_pop_df.index)

df['Population'] = est_pop_df['Population']
df['Land Area'] = land_area_df['Land Area']
df

Unnamed: 0_level_0,Population,Land Area
State,Unnamed: 1_level_1,Unnamed: 2_level_1
ALABAMA,4887681,50645.33
ALASKA,735139,570640.95
ARIZONA,7158024,113594.08
ARKANSAS,3009733,52035.48
CALIFORNIA,39461588,155779.22
COLORADO,5691287,103641.89
CONNECTICUT,3571520,4842.36
DELAWARE,965479,1948.54
DISTRICT OF COLUMBIA,701547,61.05
FLORIDA,21244317,53624.76


In [25]:
state_pop_den_df = land_area_df.merge(est_pop_df, on='State', how='left')
state_pop_den_df

Unnamed: 0_level_0,Land Area,Population
State,Unnamed: 1_level_1,Unnamed: 2_level_1
ALABAMA,50645.33,4887681
ALASKA,570640.95,735139
ARIZONA,113594.08,7158024
ARKANSAS,52035.48,3009733
CALIFORNIA,155779.22,39461588
COLORADO,103641.89,5691287
CONNECTICUT,4842.36,3571520
DELAWARE,1948.54,965479
DISTRICT OF COLUMBIA,61.05,701547
FLORIDA,53624.76,21244317


In [26]:
state_pop_den_df['Population Density'] = state_pop_den_df['Population'] / state_pop_den_df['Land Area']
state_pop_den_df = state_pop_den_df.drop(['Land Area', 'Population'], axis = 1)
state_pop_den_df

Unnamed: 0_level_0,Population Density
State,Unnamed: 1_level_1
ALABAMA,96.508029
ALASKA,1.288269
ARIZONA,63.014058
ARKANSAS,57.840016
CALIFORNIA,253.317407
COLORADO,54.912999
CONNECTICUT,737.55772
DELAWARE,495.488417
DISTRICT OF COLUMBIA,11491.351351
FLORIDA,396.166193


### Create a dataset for the counties

In [27]:
est_pop_df2 = est_pop_df2.drop(0)
est_pop_df2.head()

Unnamed: 0,Area,2018
1,".Autauga County, Alabama",55533
2,".Baldwin County, Alabama",217855
3,".Barbour County, Alabama",24872
4,".Bibb County, Alabama",22367
5,".Blount County, Alabama",57771


In [28]:
#Need to process Area strings and groupby county
new_df = est_pop_df2['Area'].str.split(',', n = 1, expand = True)
est_pop_df2["County/City"] = new_df[0] 
est_pop_df2["State"] = new_df[1] 

In [29]:
est_pop_df2 = est_pop_df2.drop(['Area'], axis=1)
est_pop_df2 = est_pop_df2.rename(columns = {"2018": "Population"})
est_pop_df2 = est_pop_df2[['County/City', 'State', 'Population']]
est_pop_df2['State'] = est_pop_df2['State'].str.strip()
est_pop_df2

Unnamed: 0,County/City,State,Population
1,.Autauga County,Alabama,55533
2,.Baldwin County,Alabama,217855
3,.Barbour County,Alabama,24872
4,.Bibb County,Alabama,22367
5,.Blount County,Alabama,57771
...,...,...,...
3138,.Sweetwater County,Wyoming,42858
3139,.Teton County,Wyoming,23269
3140,.Uinta County,Wyoming,20292
3141,.Washakie County,Wyoming,7877


In [30]:
est_pop_df2['County/City'] = est_pop_df2['County/City'].str.replace(".", "", regex = True)
est_pop_df2

Unnamed: 0,County/City,State,Population
1,Autauga County,Alabama,55533
2,Baldwin County,Alabama,217855
3,Barbour County,Alabama,24872
4,Bibb County,Alabama,22367
5,Blount County,Alabama,57771
...,...,...,...
3138,Sweetwater County,Wyoming,42858
3139,Teton County,Wyoming,23269
3140,Uinta County,Wyoming,20292
3141,Washakie County,Wyoming,7877


### Get land area

In [32]:
land_area_df2

Unnamed: 0,Areaname,LND110210D
0,UNITED STATES,3531905.43
1,ALABAMA,50645.33
2,"Autauga, AL",594.44
3,"Baldwin, AL",1589.78
4,"Barbour, AL",884.88
...,...,...
3193,"Sweetwater, WY",10426.65
3194,"Teton, WY",3995.38
3195,"Uinta, WY",2081.26
3196,"Washakie, WY",2238.55


In [33]:
land_area_df2[land_area_df2['Areaname'].str.contains(',')].sum()

Areaname      Autauga, ALBaldwin, ALBarbour, ALBibb, ALBloun...
LND110210D                                           3531845.96
dtype: object

In [34]:
land_area_df2 = land_area_df2.rename(columns = {"Areaname": "County/City", "LND110210D": "Land Area"})
land_area_df2 = land_area_df2[land_area_df2['County/City'].str.contains(',')]
land_area_df2.head()

Unnamed: 0,County/City,Land Area
2,"Autauga, AL",594.44
3,"Baldwin, AL",1589.78
4,"Barbour, AL",884.88
5,"Bibb, AL",622.58
6,"Blount, AL",644.78


We can see that that 52 rows have been removed (51 states and America itself)

In [36]:
#Now we have to separate the county name and state name
new_df = land_area_df2['County/City'].str.split(',', n = 1, expand = True)
land_area_df2["County/City"] = new_df[0] 
land_area_df2["State Code"] = new_df[1] 
land_area_df2

Unnamed: 0,County/City,Land Area,State Code
2,Autauga,594.44,AL
3,Baldwin,1589.78,AL
4,Barbour,884.88,AL
5,Bibb,622.58,AL
6,Blount,644.78,AL
...,...,...,...
3193,Sweetwater,10426.65,WY
3194,Teton,3995.38,WY
3195,Uinta,2081.26,WY
3196,Washakie,2238.55,WY


In [37]:
land_area_df2 = land_area_df2.reset_index(drop=True)
land_area_df2['State Code'] = land_area_df2['State Code'].str.strip()
land_area_df2

Unnamed: 0,County/City,Land Area,State Code
0,Autauga,594.44,AL
1,Baldwin,1589.78,AL
2,Barbour,884.88,AL
3,Bibb,622.58,AL
4,Blount,644.78,AL
...,...,...,...
3140,Sweetwater,10426.65,WY
3141,Teton,3995.38,WY
3142,Uinta,2081.26,WY
3143,Washakie,2238.55,WY


In [38]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
"""
Code Reference
Roger Allen (Mar 31 2021).Github Gist. 
Available at: https://gist.github.com/rogerallen/1583593 (Accessed 8/4/21).
"""



In [39]:
#We need to invert this dictionary
state_dict = dict(map(reversed, us_state_abbrev.items()))
state_dict

{'AL': 'Alabama',
 'AK': 'Alaska',
 'AS': 'American Samoa',
 'AZ': 'Arizona',
 'AR': 'Arkansas',
 'CA': 'California',
 'CO': 'Colorado',
 'CT': 'Connecticut',
 'DE': 'Delaware',
 'DC': 'District of Columbia',
 'FL': 'Florida',
 'GA': 'Georgia',
 'GU': 'Guam',
 'HI': 'Hawaii',
 'ID': 'Idaho',
 'IL': 'Illinois',
 'IN': 'Indiana',
 'IA': 'Iowa',
 'KS': 'Kansas',
 'KY': 'Kentucky',
 'LA': 'Louisiana',
 'ME': 'Maine',
 'MD': 'Maryland',
 'MA': 'Massachusetts',
 'MI': 'Michigan',
 'MN': 'Minnesota',
 'MS': 'Mississippi',
 'MO': 'Missouri',
 'MT': 'Montana',
 'NE': 'Nebraska',
 'NV': 'Nevada',
 'NH': 'New Hampshire',
 'NJ': 'New Jersey',
 'NM': 'New Mexico',
 'NY': 'New York',
 'NC': 'North Carolina',
 'ND': 'North Dakota',
 'MP': 'Northern Mariana Islands',
 'OH': 'Ohio',
 'OK': 'Oklahoma',
 'OR': 'Oregon',
 'PA': 'Pennsylvania',
 'PR': 'Puerto Rico',
 'RI': 'Rhode Island',
 'SC': 'South Carolina',
 'SD': 'South Dakota',
 'TN': 'Tennessee',
 'TX': 'Texas',
 'UT': 'Utah',
 'VT': 'Vermont',
 '

In [40]:
land_area_df2['State'] = land_area_df2['State Code'].map(state_dict)
land_area_df2

Unnamed: 0,County/City,Land Area,State Code,State
0,Autauga,594.44,AL,Alabama
1,Baldwin,1589.78,AL,Alabama
2,Barbour,884.88,AL,Alabama
3,Bibb,622.58,AL,Alabama
4,Blount,644.78,AL,Alabama
...,...,...,...,...
3140,Sweetwater,10426.65,WY,Wyoming
3141,Teton,3995.38,WY,Wyoming
3142,Uinta,2081.26,WY,Wyoming
3143,Washakie,2238.55,WY,Wyoming


In [41]:
land_area_df2 = land_area_df2.drop(['State Code'], axis = 1)
land_area_df2 = land_area_df2[['County/City', 'State', 'Land Area']]
land_area_df2

Unnamed: 0,County/City,State,Land Area
0,Autauga,Alabama,594.44
1,Baldwin,Alabama,1589.78
2,Barbour,Alabama,884.88
3,Bibb,Alabama,622.58
4,Blount,Alabama,644.78
...,...,...,...
3140,Sweetwater,Wyoming,10426.65
3141,Teton,Wyoming,3995.38
3142,Uinta,Wyoming,2081.26
3143,Washakie,Wyoming,2238.55


### Calculating Pop Density for county city

In [42]:
est_pop_df2

Unnamed: 0,County/City,State,Population
1,Autauga County,Alabama,55533
2,Baldwin County,Alabama,217855
3,Barbour County,Alabama,24872
4,Bibb County,Alabama,22367
5,Blount County,Alabama,57771
...,...,...,...
3138,Sweetwater County,Wyoming,42858
3139,Teton County,Wyoming,23269
3140,Uinta County,Wyoming,20292
3141,Washakie County,Wyoming,7877


In [43]:
est_pop_df2

Unnamed: 0,County/City,State,Population
1,Autauga County,Alabama,55533
2,Baldwin County,Alabama,217855
3,Barbour County,Alabama,24872
4,Bibb County,Alabama,22367
5,Blount County,Alabama,57771
...,...,...,...
3138,Sweetwater County,Wyoming,42858
3139,Teton County,Wyoming,23269
3140,Uinta County,Wyoming,20292
3141,Washakie County,Wyoming,7877


In [44]:
land_area_df2

Unnamed: 0,County/City,State,Land Area
0,Autauga,Alabama,594.44
1,Baldwin,Alabama,1589.78
2,Barbour,Alabama,884.88
3,Bibb,Alabama,622.58
4,Blount,Alabama,644.78
...,...,...,...
3140,Sweetwater,Wyoming,10426.65
3141,Teton,Wyoming,3995.38
3142,Uinta,Wyoming,2081.26
3143,Washakie,Wyoming,2238.55


In [45]:
est_pop_df2
est_pop_df2 = est_pop_df2.rename(columns = {"County/City": "temp_name"})

In [46]:
new_df = est_pop_df2['temp_name'].str.split(' ', n = 0, expand = True)
est_pop_df2["County/City"] = new_df[0] 
est_pop_df2 

Unnamed: 0,temp_name,State,Population,County/City
1,Autauga County,Alabama,55533,Autauga
2,Baldwin County,Alabama,217855,Baldwin
3,Barbour County,Alabama,24872,Barbour
4,Bibb County,Alabama,22367,Bibb
5,Blount County,Alabama,57771,Blount
...,...,...,...,...
3138,Sweetwater County,Wyoming,42858,Sweetwater
3139,Teton County,Wyoming,23269,Teton
3140,Uinta County,Wyoming,20292,Uinta
3141,Washakie County,Wyoming,7877,Washakie


In [47]:
msa_pop_den_df = land_area_df2.merge(est_pop_df2, on=['County/City', 'State'], how='left')
msa_pop_den_df = msa_pop_den_df.drop('County/City', axis=1)
msa_pop_den_df = msa_pop_den_df.rename(columns = {"temp_name": "County/City"})
msa_pop_den_df

Unnamed: 0,State,Land Area,County/City,Population
0,Alabama,594.44,Autauga County,55533.0
1,Alabama,1589.78,Baldwin County,217855.0
2,Alabama,884.88,Barbour County,24872.0
3,Alabama,622.58,Bibb County,22367.0
4,Alabama,644.78,Blount County,57771.0
...,...,...,...,...
3154,Wyoming,10426.65,Sweetwater County,42858.0
3155,Wyoming,3995.38,Teton County,23269.0
3156,Wyoming,2081.26,Uinta County,20292.0
3157,Wyoming,2238.55,Washakie County,7877.0


In [48]:
msa_pop_den_df = msa_pop_den_df[['County/City', 'State', 'Land Area', 'Population']]
msa_pop_den_df

Unnamed: 0,County/City,State,Land Area,Population
0,Autauga County,Alabama,594.44,55533.0
1,Baldwin County,Alabama,1589.78,217855.0
2,Barbour County,Alabama,884.88,24872.0
3,Bibb County,Alabama,622.58,22367.0
4,Blount County,Alabama,644.78,57771.0
...,...,...,...,...
3154,Sweetwater County,Wyoming,10426.65,42858.0
3155,Teton County,Wyoming,3995.38,23269.0
3156,Uinta County,Wyoming,2081.26,20292.0
3157,Washakie County,Wyoming,2238.55,7877.0


In [49]:
msa_pop_den_df['Population Density'] = msa_pop_den_df['Population'] / msa_pop_den_df['Land Area']
msa_pop_den_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  msa_pop_den_df['Population Density'] = msa_pop_den_df['Population'] / msa_pop_den_df['Land Area']


Unnamed: 0,County/City,State,Land Area,Population,Population Density
0,Autauga County,Alabama,594.44,55533.0,93.420698
1,Baldwin County,Alabama,1589.78,217855.0,137.034684
2,Barbour County,Alabama,884.88,24872.0,28.107766
3,Bibb County,Alabama,622.58,22367.0,35.926307
4,Blount County,Alabama,644.78,57771.0,89.598002
...,...,...,...,...,...
3154,Sweetwater County,Wyoming,10426.65,42858.0,4.110429
3155,Teton County,Wyoming,3995.38,23269.0,5.823977
3156,Uinta County,Wyoming,2081.26,20292.0,9.749863
3157,Washakie County,Wyoming,2238.55,7877.0,3.518796


In [50]:
msa_pop_den_df.isnull().sum()

County/City           205
State                   0
Land Area               0
Population            205
Population Density    205
dtype: int64

In [51]:
msa_pop_den_df = msa_pop_den_df.dropna()
msa_pop_den_df

Unnamed: 0,County/City,State,Land Area,Population,Population Density
0,Autauga County,Alabama,594.44,55533.0,93.420698
1,Baldwin County,Alabama,1589.78,217855.0,137.034684
2,Barbour County,Alabama,884.88,24872.0,28.107766
3,Bibb County,Alabama,622.58,22367.0,35.926307
4,Blount County,Alabama,644.78,57771.0,89.598002
...,...,...,...,...,...
3154,Sweetwater County,Wyoming,10426.65,42858.0,4.110429
3155,Teton County,Wyoming,3995.38,23269.0,5.823977
3156,Uinta County,Wyoming,2081.26,20292.0,9.749863
3157,Washakie County,Wyoming,2238.55,7877.0,3.518796


In [52]:
msa_pop_den_df.describe()

Unnamed: 0,Land Area,Population,Population Density
count,2954.0,2954.0,2954.0
mean,1034.963561,96410.63,301.302455
std,3171.129583,271197.1,3600.221482
min,2.5,86.0,0.03683
25%,430.41,11000.25,17.041044
50%,609.195,25534.0,44.809639
75%,911.375,66734.0,114.923778
max,145504.79,5171960.0,184048.557692


In [53]:
msa_pop_den_df.sort_values('Population Density', ascending=False).head(20)

Unnamed: 0,County/City,State,Land Area,Population,Population Density
2934,Fairfax County,Virginia,6.24,1148463.0,184048.557692
1855,Kings County,New York,70.82,2578074.0,36403.191189
1834,Bronx County,New York,42.1,1432087.0,34016.31829
1872,Queens County,New York,108.53,2274605.0,20958.306459
1786,Hudson County,New Jersey,46.19,671931.0,14547.109764
1230,Suffolk County,Massachusetts,58.15,803147.0,13811.642304
2298,Philadelphia County,Pennsylvania,134.1,1583592.0,11809.038031
2924,Bedford County,Virginia,6.88,78882.0,11465.406977
2923,Alexandria city,Virginia,15.03,159069.0,10583.433134
2830,Arlington County,Virginia,25.97,236025.0,9088.371198


# Saving the Dataframe

In [54]:
state_pop_den_df.to_csv(PROCESSED_STATE_POP_DEN_DATASET)

In [55]:
msa_pop_den_df.to_csv(PROCESSED_MSA_POP_DEN_DATASET, index=False)