In [47]:
import pandas as pd
from pathlib import Path

In [48]:
# Import the CSV file
census_data_csv = Path("Data/censusdata.csv")
census_data_df = pd.read_csv(census_data_csv, encoding='latin-1')

In [49]:
census_data_df.columns

Index(['SUMLEV', 'REGION', 'DIVISION', 'STATE', 'COUNTY', 'STNAME', 'CTYNAME',
       'CENSUS2010POP', 'ESTIMATESBASE2010', 'POPESTIMATE2010',
       ...
       'RNETMIG2011', 'RNETMIG2012', 'RNETMIG2013', 'RNETMIG2014',
       'RNETMIG2015', 'RNETMIG2016', 'RNETMIG2017', 'RNETMIG2018',
       'RNETMIG2019', 'RNETMIG2020'],
      dtype='object', length=180)

In [50]:
# Create a new dataframe with columns E, F, G, J through T, which shows the total population for each state 
# and county based on the following formula: Population base + Birth - deaths + Migration 
census_df = census_data_df.loc[:, ['COUNTY', 'STNAME', 'CTYNAME', 'POPESTIMATE2010', 'POPESTIMATE2011', \
                                   'POPESTIMATE2012', 'POPESTIMATE2013', 'POPESTIMATE2014', 'POPESTIMATE2015', \
                                   'POPESTIMATE2016', 'POPESTIMATE2017', 'POPESTIMATE2018', 'POPESTIMATE2019', \
                                   'POPESTIMATE2020']]
census_df.head()

Unnamed: 0,COUNTY,STNAME,CTYNAME,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020
0,0,Alabama,Alabama,4785514,4799642,4816632,4831586,4843737,4854803,4866824,4877989,4891628,4907965,4921532
1,1,Alabama,Autauga County,54761,55229,54970,54747,54922,54903,55302,55448,55533,55769,56145
2,3,Alabama,Baldwin County,183121,186579,190203,194978,199306,203101,207787,212737,218071,223565,229287
3,5,Alabama,Barbour County,27325,27344,27172,26946,26768,26300,25828,25169,24887,24657,24589
4,7,Alabama,Bibb County,22858,22736,22657,22510,22541,22553,22590,22532,22300,22313,22136


# State analysis

In [51]:
# Create another dataframe with rows that have "0" in column E, which only looks at the states (no counties)
state_df = census_df[census_df['COUNTY'] == 0]
state_df.head()

Unnamed: 0,COUNTY,STNAME,CTYNAME,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020
0,0,Alabama,Alabama,4785514,4799642,4816632,4831586,4843737,4854803,4866824,4877989,4891628,4907965,4921532
68,0,Alaska,Alaska,713982,722349,730810,737626,737075,738430,742575,740983,736624,733603,731158
99,0,Arizona,Arizona,6407342,6473416,6556344,6634690,6732873,6832810,6944767,7048088,7164228,7291843,7421401
115,0,Arkansas,Arkansas,2921998,2941038,2952876,2960459,2968759,2979732,2991815,3003855,3012161,3020985,3030522
191,0,California,California,37319550,37636311,37944551,38253768,38586706,38904296,39149186,39337785,39437463,39437610,39368078


In [52]:
#convert strings to numeric type for calculation of %
state_df.loc[:,['POPESTIMATE2010', 'POPESTIMATE2011', 'POPESTIMATE2012', 'POPESTIMATE2013', \
                 'POPESTIMATE2014', 'POPESTIMATE2015', 'POPESTIMATE2016', 'POPESTIMATE2017', \
                 'POPESTIMATE2018', 'POPESTIMATE2019', 'POPESTIMATE2020']] = \
state_df.loc[:,['POPESTIMATE2010', 'POPESTIMATE2011', 'POPESTIMATE2012', \
                 'POPESTIMATE2013', 'POPESTIMATE2014', 'POPESTIMATE2015', \
                 'POPESTIMATE2016', 'POPESTIMATE2017', 'POPESTIMATE2018', \
                 'POPESTIMATE2019', 'POPESTIMATE2020']].apply(pd.to_numeric)

state_df.dtypes

COUNTY              int64
STNAME             object
CTYNAME            object
POPESTIMATE2010     int64
POPESTIMATE2011     int64
POPESTIMATE2012     int64
POPESTIMATE2013     int64
POPESTIMATE2014     int64
POPESTIMATE2015     int64
POPESTIMATE2016     int64
POPESTIMATE2017     int64
POPESTIMATE2018     int64
POPESTIMATE2019     int64
POPESTIMATE2020     int64
dtype: object

In [53]:
state_df.head()

Unnamed: 0,COUNTY,STNAME,CTYNAME,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020
0,0,Alabama,Alabama,4785514,4799642,4816632,4831586,4843737,4854803,4866824,4877989,4891628,4907965,4921532
68,0,Alaska,Alaska,713982,722349,730810,737626,737075,738430,742575,740983,736624,733603,731158
99,0,Arizona,Arizona,6407342,6473416,6556344,6634690,6732873,6832810,6944767,7048088,7164228,7291843,7421401
115,0,Arkansas,Arkansas,2921998,2941038,2952876,2960459,2968759,2979732,2991815,3003855,3012161,3020985,3030522
191,0,California,California,37319550,37636311,37944551,38253768,38586706,38904296,39149186,39337785,39437463,39437610,39368078


In [54]:
states_raw_df = state_df.drop('COUNTY', axis=1)
states_raw_df.reset_index()
selected_states = ['District of Columbia', 'Utah', 'Texas', 'Illinois', 'West Virginia']
select_states_population_df = states_raw_df.groupby("STNAME").sum() 
select_states_population_df
select_states_total_population_df = select_states_population_df[select_states_population_df.index.isin(selected_states)]
select_states_total_population_df 


Unnamed: 0_level_0,CTYNAME,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020
STNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
District of Columbia,District of Columbia,605282,620290,635737,651559,663603,677014,687576,697079,704147,708253,712816
Illinois,Illinois,12840545,12867783,12883029,12895778,12885092,12859585,12821709,12779893,12724685,12667017,12587530
Texas,Texas,25241897,25645504,26084120,26479646,26963092,27468531,27914064,28291024,28624564,28986794,29360759
Utah,Utah,2775413,2814797,2854146,2898773,2938327,2983626,3044241,3103540,3155153,3203383,3249879
West Virginia,West Virginia,1854265,1856606,1857446,1854768,1850569,1843332,1832435,1818683,1805953,1795263,1784787


In [55]:
# storing the dataframe to access it from another jupiter notebook
%store select_states_total_population_df 

Stored 'select_states_total_population_df' (DataFrame)


In [56]:
state_df.loc[:,["Popestimate Yearly Change 2011(%)"]] = ((state_df["POPESTIMATE2011"] / \
                                        state_df["POPESTIMATE2010"])-1)*100

state_df.loc[:,["Popestimate Yearly Change 2012(%)"]] = ((state_df["POPESTIMATE2012"] / \
                                        state_df["POPESTIMATE2011"])-1)*100
state_df.loc[:,["Popestimate Yearly Change 2013(%)"]] = ((state_df["POPESTIMATE2013"] / \
                                        state_df["POPESTIMATE2012"])-1)*100
state_df.loc[:,["Popestimate Yearly Change 2014(%)"]] = ((state_df["POPESTIMATE2014"] / \
                                        state_df["POPESTIMATE2013"])-1)*100
state_df.loc[:,["Popestimate Yearly Change 2015(%)"]] = ((state_df["POPESTIMATE2015"] / \
                                        state_df["POPESTIMATE2014"])-1)*100
state_df.loc[:,["Popestimate Yearly Change 2016(%)"]] = ((state_df["POPESTIMATE2016"] / \
                                        state_df["POPESTIMATE2015"])-1)*100
state_df.loc[:,["Popestimate Yearly Change 2017(%)"]] = ((state_df["POPESTIMATE2017"] / \
                                        state_df["POPESTIMATE2016"])-1)*100
state_df.loc[:,["Popestimate Yearly Change 2018(%)"]] = ((state_df["POPESTIMATE2018"] / \
                                        state_df["POPESTIMATE2017"])-1)*100
state_df.loc[:,["Popestimate Yearly Change 2019(%)"]] = ((state_df["POPESTIMATE2019"] / \
                                        state_df["POPESTIMATE2018"])-1)*100
state_df.loc[:,["Popestimate Yearly Change 2020(%)"]] = ((state_df["POPESTIMATE2020"] / \
                                        state_df["POPESTIMATE2019"])-1)*100
state_df.loc[:,["Popestimate 10-Year Change 2020(%)"]] = ((state_df["POPESTIMATE2020"] / \
                                        state_df["POPESTIMATE2010"])-1)*100

In [57]:
state_pct_change_df = state_df.loc[:, ['STNAME','CTYNAME', 'Popestimate Yearly Change 2011(%)', \
                                             'Popestimate Yearly Change 2012(%)', \
                                             'Popestimate Yearly Change 2013(%)', \
                                             'Popestimate Yearly Change 2014(%)',
                                             'Popestimate Yearly Change 2015(%)',
                                             'Popestimate Yearly Change 2016(%)',
                                             'Popestimate Yearly Change 2017(%)',
                                             'Popestimate Yearly Change 2018(%)',
                                             'Popestimate Yearly Change 2019(%)',
                                             'Popestimate Yearly Change 2020(%)',
                                             'Popestimate 10-Year Change 2020(%)']]
state_pct_change_df.head()
                           

Unnamed: 0,STNAME,CTYNAME,Popestimate Yearly Change 2011(%),Popestimate Yearly Change 2012(%),Popestimate Yearly Change 2013(%),Popestimate Yearly Change 2014(%),Popestimate Yearly Change 2015(%),Popestimate Yearly Change 2016(%),Popestimate Yearly Change 2017(%),Popestimate Yearly Change 2018(%),Popestimate Yearly Change 2019(%),Popestimate Yearly Change 2020(%),Popestimate 10-Year Change 2020(%)
0,Alabama,Alabama,0.295224,0.353985,0.310466,0.251491,0.22846,0.24761,0.22941,0.279603,0.333979,0.276428,2.842286
68,Alaska,Alaska,1.171878,1.171317,0.932664,-0.074699,0.183835,0.561326,-0.214389,-0.588273,-0.410114,-0.333287,2.405663
99,Arizona,Arizona,1.031223,1.281055,1.194965,1.479843,1.484314,1.638521,1.487753,1.647823,1.781281,1.776752,15.826516
115,Arkansas,Arkansas,0.651609,0.402511,0.2568,0.280362,0.369616,0.405506,0.402431,0.276511,0.292946,0.315692,3.714034
191,California,California,0.84878,0.818996,0.814918,0.87034,0.823055,0.629468,0.481744,0.25339,0.000373,-0.176309,5.489155


In [58]:
# Sort the data by 10-Year Change by State, Highest to Lowest
state_sorted_df = state_pct_change_df.sort_values(["Popestimate 10-Year Change 2020(%)"],
                                           ascending=False)

# Reset Index
state_sorted_df = state_sorted_df.reset_index(drop=True)
state_sorted_df.head()

Unnamed: 0,STNAME,CTYNAME,Popestimate Yearly Change 2011(%),Popestimate Yearly Change 2012(%),Popestimate Yearly Change 2013(%),Popestimate Yearly Change 2014(%),Popestimate Yearly Change 2015(%),Popestimate Yearly Change 2016(%),Popestimate Yearly Change 2017(%),Popestimate Yearly Change 2018(%),Popestimate Yearly Change 2019(%),Popestimate Yearly Change 2020(%),Popestimate 10-Year Change 2020(%)
0,District of Columbia,District of Columbia,2.479505,2.490287,2.488765,1.84849,2.020937,1.560086,1.382102,1.013945,0.583117,0.644261,17.765934
1,Utah,Utah,1.419032,1.397934,1.563585,1.364508,1.54166,2.031588,1.947908,1.663036,1.52861,1.451466,17.09533
2,Texas,Texas,1.598957,1.710304,1.516348,1.825727,1.874559,1.621976,1.35043,1.178961,1.265452,1.290122,16.317561
3,Idaho,Idaho,0.856432,0.734596,1.011523,1.25275,1.240437,1.90869,2.120442,1.879872,2.110984,2.115804,16.303215
4,Nevada,Nevada,0.393379,1.163092,1.176316,1.511691,1.759388,1.77875,1.799658,1.972614,1.981242,1.536445,16.125023


In [59]:
state_sorted_df

Unnamed: 0,STNAME,CTYNAME,Popestimate Yearly Change 2011(%),Popestimate Yearly Change 2012(%),Popestimate Yearly Change 2013(%),Popestimate Yearly Change 2014(%),Popestimate Yearly Change 2015(%),Popestimate Yearly Change 2016(%),Popestimate Yearly Change 2017(%),Popestimate Yearly Change 2018(%),Popestimate Yearly Change 2019(%),Popestimate Yearly Change 2020(%),Popestimate 10-Year Change 2020(%)
0,District of Columbia,District of Columbia,2.479505,2.490287,2.488765,1.84849,2.020937,1.560086,1.382102,1.013945,0.583117,0.644261,17.765934
1,Utah,Utah,1.419032,1.397934,1.563585,1.364508,1.54166,2.031588,1.947908,1.663036,1.52861,1.451466,17.09533
2,Texas,Texas,1.598957,1.710304,1.516348,1.825727,1.874559,1.621976,1.35043,1.178961,1.265452,1.290122,16.317561
3,Idaho,Idaho,0.856432,0.734596,1.011523,1.25275,1.240437,1.90869,2.120442,1.879872,2.110984,2.115804,16.303215
4,Nevada,Nevada,0.393379,1.163092,1.176316,1.511691,1.759388,1.77875,1.799658,1.972614,1.981242,1.536445,16.125023
5,Arizona,Arizona,1.031223,1.281055,1.194965,1.479843,1.484314,1.638521,1.487753,1.647823,1.781281,1.776752,15.826516
6,Florida,Florida,1.111442,1.293105,1.29345,1.545658,1.839595,2.018516,1.696068,1.324478,1.115647,1.122536,15.319681
7,Colorado,Colorado,1.473213,1.401043,1.484772,1.553149,1.89983,1.641192,1.327184,1.419406,1.07652,0.854964,15.060409
8,Washington,Washington,1.252705,1.041673,0.980677,1.310303,1.555161,1.851105,1.753297,1.330677,1.15894,1.045282,14.097608
9,North Dakota,North Dakota,1.596735,2.436231,2.979378,2.155434,2.27429,0.07637,0.084776,0.436997,0.481803,0.207536,13.420783


In [60]:
# Print out the data for the state or territory with the highest 10-Year Change
highest_10y_change = state_sorted_df.loc[0, :]
highest_10y_change

STNAME                                District of Columbia
CTYNAME                               District of Columbia
Popestimate Yearly Change 2011(%)                 2.479505
Popestimate Yearly Change 2012(%)                 2.490287
Popestimate Yearly Change 2013(%)                 2.488765
Popestimate Yearly Change 2014(%)                  1.84849
Popestimate Yearly Change 2015(%)                 2.020937
Popestimate Yearly Change 2016(%)                 1.560086
Popestimate Yearly Change 2017(%)                 1.382102
Popestimate Yearly Change 2018(%)                 1.013945
Popestimate Yearly Change 2019(%)                 0.583117
Popestimate Yearly Change 2020(%)                 0.644261
Popestimate 10-Year Change 2020(%)               17.765934
Name: 0, dtype: object

In [61]:
# Print out the data for the state or territory with the second highest 10-Year Change
highest_10y_change = state_sorted_df.loc[1, :]
highest_10y_change

STNAME                                    Utah
CTYNAME                                   Utah
Popestimate Yearly Change 2011(%)     1.419032
Popestimate Yearly Change 2012(%)     1.397934
Popestimate Yearly Change 2013(%)     1.563585
Popestimate Yearly Change 2014(%)     1.364508
Popestimate Yearly Change 2015(%)      1.54166
Popestimate Yearly Change 2016(%)     2.031588
Popestimate Yearly Change 2017(%)     1.947908
Popestimate Yearly Change 2018(%)     1.663036
Popestimate Yearly Change 2019(%)      1.52861
Popestimate Yearly Change 2020(%)     1.451466
Popestimate 10-Year Change 2020(%)    17.09533
Name: 1, dtype: object

In [62]:
# Print out the data for the state or territory with the lowest 10-Year Change
lowest_10y_change = state_sorted_df.loc[len(state_sorted_df)-1, :]
lowest_10y_change

STNAME                                West Virginia
CTYNAME                               West Virginia
Popestimate Yearly Change 2011(%)          0.126249
Popestimate Yearly Change 2012(%)          0.045244
Popestimate Yearly Change 2013(%)         -0.144176
Popestimate Yearly Change 2014(%)         -0.226389
Popestimate Yearly Change 2015(%)         -0.391069
Popestimate Yearly Change 2016(%)         -0.591158
Popestimate Yearly Change 2017(%)         -0.750477
Popestimate Yearly Change 2018(%)         -0.699957
Popestimate Yearly Change 2019(%)         -0.591931
Popestimate Yearly Change 2020(%)         -0.583536
Popestimate 10-Year Change 2020(%)        -3.746929
Name: 50, dtype: object

In [63]:
# Print out the data for the state or territory with the second lowest 10-Year Change
lowest_10y_change = state_sorted_df.loc[len(state_sorted_df)-2, :]
lowest_10y_change

STNAME                                Illinois
CTYNAME                               Illinois
Popestimate Yearly Change 2011(%)     0.212125
Popestimate Yearly Change 2012(%)     0.118482
Popestimate Yearly Change 2013(%)      0.09896
Popestimate Yearly Change 2014(%)    -0.082864
Popestimate Yearly Change 2015(%)    -0.197957
Popestimate Yearly Change 2016(%)    -0.294535
Popestimate Yearly Change 2017(%)    -0.326134
Popestimate Yearly Change 2018(%)    -0.431991
Popestimate Yearly Change 2019(%)    -0.453198
Popestimate Yearly Change 2020(%)    -0.627512
Popestimate 10-Year Change 2020(%)   -1.970438
Name: 49, dtype: object

In [64]:
#Create a dataframe with only Washington (District of Columbia), Utah, Texas, Illinois, Virginia states
# Select the first 3 rows
first_3_rows = state_sorted_df.iloc[:3]

# Select the last 2 rows
last_2_rows = state_sorted_df.iloc[-2:]

# Concatenate the selected rows into a new DataFrame
select_states_population_change_df = pd.concat([first_3_rows, last_2_rows])
select_states_population_change_df

Unnamed: 0,STNAME,CTYNAME,Popestimate Yearly Change 2011(%),Popestimate Yearly Change 2012(%),Popestimate Yearly Change 2013(%),Popestimate Yearly Change 2014(%),Popestimate Yearly Change 2015(%),Popestimate Yearly Change 2016(%),Popestimate Yearly Change 2017(%),Popestimate Yearly Change 2018(%),Popestimate Yearly Change 2019(%),Popestimate Yearly Change 2020(%),Popestimate 10-Year Change 2020(%)
0,District of Columbia,District of Columbia,2.479505,2.490287,2.488765,1.84849,2.020937,1.560086,1.382102,1.013945,0.583117,0.644261,17.765934
1,Utah,Utah,1.419032,1.397934,1.563585,1.364508,1.54166,2.031588,1.947908,1.663036,1.52861,1.451466,17.09533
2,Texas,Texas,1.598957,1.710304,1.516348,1.825727,1.874559,1.621976,1.35043,1.178961,1.265452,1.290122,16.317561
49,Illinois,Illinois,0.212125,0.118482,0.09896,-0.082864,-0.197957,-0.294535,-0.326134,-0.431991,-0.453198,-0.627512,-1.970438
50,West Virginia,West Virginia,0.126249,0.045244,-0.144176,-0.226389,-0.391069,-0.591158,-0.750477,-0.699957,-0.591931,-0.583536,-3.746929


In [65]:
# storing the dataframe to access it from another jupiter notebook
%store select_states_population_change_df

Stored 'select_states_population_change_df' (DataFrame)


# County analysis

In [66]:
# Create another dataframe, which only looks at the counties
county_df = census_df[census_df['COUNTY'] != 0]
county_df

Unnamed: 0,COUNTY,STNAME,CTYNAME,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020
1,1,Alabama,Autauga County,54761,55229,54970,54747,54922,54903,55302,55448,55533,55769,56145
2,3,Alabama,Baldwin County,183121,186579,190203,194978,199306,203101,207787,212737,218071,223565,229287
3,5,Alabama,Barbour County,27325,27344,27172,26946,26768,26300,25828,25169,24887,24657,24589
4,7,Alabama,Bibb County,22858,22736,22657,22510,22541,22553,22590,22532,22300,22313,22136
5,9,Alabama,Blount County,57372,57561,57585,57630,57536,57535,57487,57801,57770,57840,57879
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3189,37,Wyoming,Sweetwater County,43580,44000,45032,45189,44996,44780,44319,43663,43188,42917,42673
3190,39,Wyoming,Teton County,21298,21422,21643,22335,22801,23083,23255,23383,23261,23385,23497
3191,41,Wyoming,Uinta County,21090,20901,21008,20969,20835,20777,20711,20449,20299,20196,20215
3192,43,Wyoming,Washakie County,8531,8451,8410,8417,8277,8282,8180,8013,7886,7824,7760


In [67]:
#convert strings to numeric type for calculation of %
#convert strings to numeric type for calculation of %
county_df.loc[:,['POPESTIMATE2010', 'POPESTIMATE2011', 'POPESTIMATE2012', 'POPESTIMATE2013', \
                 'POPESTIMATE2014', 'POPESTIMATE2015', 'POPESTIMATE2016', 'POPESTIMATE2017', \
                 'POPESTIMATE2018', 'POPESTIMATE2019', 'POPESTIMATE2020']] = \
county_df[['POPESTIMATE2010', 'POPESTIMATE2011', 'POPESTIMATE2012', \
                 'POPESTIMATE2013', 'POPESTIMATE2014', 'POPESTIMATE2015', \
                 'POPESTIMATE2016', 'POPESTIMATE2017', 'POPESTIMATE2018', \
                 'POPESTIMATE2019', 'POPESTIMATE2020']].apply(pd.to_numeric)
county_df.head()
county_df.dtypes

COUNTY              int64
STNAME             object
CTYNAME            object
POPESTIMATE2010     int64
POPESTIMATE2011     int64
POPESTIMATE2012     int64
POPESTIMATE2013     int64
POPESTIMATE2014     int64
POPESTIMATE2015     int64
POPESTIMATE2016     int64
POPESTIMATE2017     int64
POPESTIMATE2018     int64
POPESTIMATE2019     int64
POPESTIMATE2020     int64
dtype: object

In [69]:
county_df.loc[:,"Popestimate Yearly Change 2011(%)"] = ((county_df["POPESTIMATE2011"] / \
                                        county_df["POPESTIMATE2010"]) -1)*100

county_df.loc[:,"Popestimate Yearly Change 2012(%)"] = ((county_df["POPESTIMATE2012"] / \
                                        county_df["POPESTIMATE2011"]) -1)*100
county_df.loc[:,"Popestimate Yearly Change 2013(%)"] = ((county_df["POPESTIMATE2013"] / \
                                        county_df["POPESTIMATE2012"]) -1)*100
county_df.loc[:,"Popestimate Yearly Change 2014(%)"] = ((county_df["POPESTIMATE2014"] / \
                                        county_df["POPESTIMATE2013"]) -1)*100
county_df.loc[:,"Popestimate Yearly Change 2015(%)"] = ((county_df["POPESTIMATE2015"] / \
                                        county_df["POPESTIMATE2014"]) -1)*100
county_df.loc[:,"Popestimate Yearly Change 2016(%)"] = ((county_df["POPESTIMATE2016"] / \
                                        county_df["POPESTIMATE2015"]) -1)*100
county_df.loc[:,"Popestimate Yearly Change 2017(%)"] = ((county_df["POPESTIMATE2017"] / \
                                        county_df["POPESTIMATE2016"]) -1)*100
county_df.loc[:,"Popestimate Yearly Change 2018(%)"] = ((county_df["POPESTIMATE2018"] / \
                                        county_df["POPESTIMATE2017"]) -1)*100
county_df.loc[:,"Popestimate Yearly Change 2019(%)"] = ((county_df["POPESTIMATE2019"] / \
                                        county_df["POPESTIMATE2018"]) -1)*100
county_df.loc[:,"Popestimate Yearly Change 2020(%)"] = ((county_df["POPESTIMATE2020"] / \
                                        county_df["POPESTIMATE2019"]) -1)*100
county_df.loc[:,"Popestimate 10-Year Change 2020(%)"] = ((county_df["POPESTIMATE2020"] / \
                                        county_df["POPESTIMATE2010"]) -1)*100

In [70]:
county_pct_change_df = county_df.loc[:, ['STNAME','CTYNAME','Popestimate Yearly Change 2011(%)', \
                                             'Popestimate Yearly Change 2012(%)', \
                                             'Popestimate Yearly Change 2013(%)', \
                                             'Popestimate Yearly Change 2014(%)',
                                             'Popestimate Yearly Change 2015(%)',
                                             'Popestimate Yearly Change 2016(%)',
                                             'Popestimate Yearly Change 2017(%)',
                                             'Popestimate Yearly Change 2018(%)',
                                             'Popestimate Yearly Change 2019(%)',
                                             'Popestimate Yearly Change 2020(%)',
                                             'Popestimate 10-Year Change 2020(%)']]
county_pct_change_df.head()
    

Unnamed: 0,STNAME,CTYNAME,Popestimate Yearly Change 2011(%),Popestimate Yearly Change 2012(%),Popestimate Yearly Change 2013(%),Popestimate Yearly Change 2014(%),Popestimate Yearly Change 2015(%),Popestimate Yearly Change 2016(%),Popestimate Yearly Change 2017(%),Popestimate Yearly Change 2018(%),Popestimate Yearly Change 2019(%),Popestimate Yearly Change 2020(%),Popestimate 10-Year Change 2020(%)
1,Alabama,Autauga County,0.854623,-0.468957,-0.405676,0.319652,-0.034595,0.726736,0.264005,0.153297,0.424973,0.67421,2.527346
2,Alabama,Baldwin County,1.888369,1.942341,2.510476,2.219738,1.904107,2.307226,2.382247,2.507321,2.519363,2.559435,25.210653
3,Alabama,Barbour County,0.069533,-0.629023,-0.831739,-0.66058,-1.748356,-1.794677,-2.551495,-1.120426,-0.924177,-0.275784,-10.012809
4,Alabama,Bibb County,-0.53373,-0.347467,-0.648806,0.137717,0.053236,0.164058,-0.256751,-1.029647,0.058296,-0.79326,-3.158632
5,Alabama,Blount County,0.329429,0.041695,0.078145,-0.163109,-0.001738,-0.083427,0.54621,-0.053632,0.12117,0.067427,0.883706


In [71]:
# Sort the data by 10-Year Change by County, Highest to Lowest
county_sorted_df = county_pct_change_df.sort_values(["Popestimate 10-Year Change 2020(%)"],
                                           ascending=False)

# Reset Index
county_sorted_df = county_sorted_df.reset_index(drop=True)
county_sorted_df.head()

Unnamed: 0,STNAME,CTYNAME,Popestimate Yearly Change 2011(%),Popestimate Yearly Change 2012(%),Popestimate Yearly Change 2013(%),Popestimate Yearly Change 2014(%),Popestimate Yearly Change 2015(%),Popestimate Yearly Change 2016(%),Popestimate Yearly Change 2017(%),Popestimate Yearly Change 2018(%),Popestimate Yearly Change 2019(%),Popestimate Yearly Change 2020(%),Popestimate 10-Year Change 2020(%)
0,North Dakota,McKenzie County,9.451029,13.835851,16.347478,18.31092,16.549968,-1.661855,0.880673,6.810853,10.728223,1.356563,137.710543
1,Texas,Loving County,13.095238,-9.473684,23.255814,-16.037736,33.707865,-1.680672,13.675214,12.030075,10.738255,9.69697,115.47619
2,North Dakota,Williams County,8.082865,9.575296,10.719492,8.584546,9.988808,-3.089403,-1.887068,6.120872,6.160009,2.118901,71.307158
3,Texas,Hays County,3.210278,3.214616,4.469556,5.019524,5.317053,5.135466,5.025882,3.753555,3.351755,4.759115,52.679554
4,Texas,Comal County,2.577051,2.469499,3.277718,3.880738,4.54457,4.297354,4.696893,5.336091,5.535317,5.35494,50.773481


In [72]:
# Print out the data for the county with the highest 10-Year Change
highest_10y_change = county_sorted_df.loc[0, :]
highest_10y_change

STNAME                                   North Dakota
CTYNAME                               McKenzie County
Popestimate Yearly Change 2011(%)            9.451029
Popestimate Yearly Change 2012(%)           13.835851
Popestimate Yearly Change 2013(%)           16.347478
Popestimate Yearly Change 2014(%)            18.31092
Popestimate Yearly Change 2015(%)           16.549968
Popestimate Yearly Change 2016(%)           -1.661855
Popestimate Yearly Change 2017(%)            0.880673
Popestimate Yearly Change 2018(%)            6.810853
Popestimate Yearly Change 2019(%)           10.728223
Popestimate Yearly Change 2020(%)            1.356563
Popestimate 10-Year Change 2020(%)         137.710543
Name: 0, dtype: object

In [73]:
# Print out the data for the county with the second highest 10-Year Change
highest_10y_change = county_sorted_df.loc[1, :]
highest_10y_change

STNAME                                        Texas
CTYNAME                               Loving County
Popestimate Yearly Change 2011(%)         13.095238
Popestimate Yearly Change 2012(%)         -9.473684
Popestimate Yearly Change 2013(%)         23.255814
Popestimate Yearly Change 2014(%)        -16.037736
Popestimate Yearly Change 2015(%)         33.707865
Popestimate Yearly Change 2016(%)         -1.680672
Popestimate Yearly Change 2017(%)         13.675214
Popestimate Yearly Change 2018(%)         12.030075
Popestimate Yearly Change 2019(%)         10.738255
Popestimate Yearly Change 2020(%)           9.69697
Popestimate 10-Year Change 2020(%)        115.47619
Name: 1, dtype: object

In [74]:
# Print out the data for the county with the lowest 10-Year Change
lowest_10y_change = county_sorted_df.loc[len(county_sorted_df)-1, :]
lowest_10y_change

STNAME                                        Illinois
CTYNAME                               Alexander County
Popestimate Yearly Change 2011(%)             -2.58379
Popestimate Yearly Change 2012(%)            -3.402978
Popestimate Yearly Change 2013(%)             -6.20386
Popestimate Yearly Change 2014(%)            -2.181718
Popestimate Yearly Change 2015(%)            -4.531338
Popestimate Yearly Change 2016(%)            -4.657696
Popestimate Yearly Change 2017(%)            -2.667494
Popestimate Yearly Change 2018(%)            -3.712556
Popestimate Yearly Change 2019(%)            -4.170114
Popestimate Yearly Change 2020(%)            -5.076843
Popestimate 10-Year Change 2020(%)          -33.004266
Name: 3142, dtype: object

In [75]:
# Print out the data for the county with the second lowest 10-Year Change
lowest_10y_change = county_sorted_df.loc[len(county_sorted_df)-2, :]
lowest_10y_change

STNAME                                        Texas
CTYNAME                               Concho County
Popestimate Yearly Change 2011(%)          0.487686
Popestimate Yearly Change 2012(%)         -0.946372
Popestimate Yearly Change 2013(%)          0.881921
Popestimate Yearly Change 2014(%)         -0.922778
Popestimate Yearly Change 2015(%)         -0.367647
Popestimate Yearly Change 2016(%)          1.697417
Popestimate Yearly Change 2017(%)        -34.663764
Popestimate Yearly Change 2018(%)         -0.814513
Popestimate Yearly Change 2019(%)          2.612915
Popestimate Yearly Change 2020(%)          2.837395
Popestimate 10-Year Change 2020(%)       -31.065594
Name: 3141, dtype: object