In [76]:
# import dependencies
import pandas as pd
import mitosheet

## Cleaning data on total population

In [77]:
# load total county population data into dataframe
county_pop_df = pd.read_csv('Resources/County_population_totals_2010_2019_co-est2019-alldata.csv')
county_pop_df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,State and County name,CENSUS2010POP,ESTIMATESBASE2010,...,RDOMESTICMIG2019,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,RNETMIG2016,RNETMIG2017,RNETMIG2018,RNETMIG2019
0,40,3,6,1,0,Alabama,Alabama,"Alabama, Alabama",4779736,4780125,...,1.917501,0.578434,1.186314,1.522549,0.563489,0.626357,0.745172,1.090366,1.773786,2.483744
1,50,3,6,1,1,Alabama,Autauga County,"Autauga County, Alabama",54571,54597,...,4.84731,6.018182,-6.226119,-3.902226,1.970443,-1.712875,4.777171,0.849656,0.540916,4.560062
2,50,3,6,1,3,Alabama,Baldwin County,"Baldwin County, Alabama",182265,182265,...,24.017829,16.64187,17.488579,22.751474,20.184334,17.725964,21.279291,22.398256,24.727215,24.380567
3,50,3,6,1,5,Alabama,Barbour County,"Barbour County, Alabama",27457,27455,...,-5.690302,0.292676,-6.897817,-8.132185,-5.140431,-15.724575,-18.238016,-24.998528,-8.754922,-5.165664
4,50,3,6,1,7,Alabama,Bibb County,"Bibb County, Alabama",22915,22915,...,1.385134,-4.998356,-3.787545,-5.797999,1.331144,1.329817,-0.708717,-3.234669,-6.857092,1.831952


In [78]:
# create dataframe with state, county, 2015 population, and 2019 population
county_pop_15_19_df = county_pop_df[['STNAME', 'CTYNAME',
       'State and County name','POPESTIMATE2015','POPESTIMATE2019']]

county_pop_15_19_df = county_pop_15_19_df[county_pop_15_19_df.STNAME != county_pop_15_19_df.CTYNAME]
county_pop_15_19_df.head()

Unnamed: 0,STNAME,CTYNAME,State and County name,POPESTIMATE2015,POPESTIMATE2019
1,Alabama,Autauga County,"Autauga County, Alabama",54864,55869
2,Alabama,Baldwin County,"Baldwin County, Alabama",202939,223234
3,Alabama,Barbour County,"Barbour County, Alabama",26283,24686
4,Alabama,Bibb County,"Bibb County, Alabama",22566,22394
5,Alabama,Blount County,"Blount County, Alabama",57526,57826


In [79]:
from mitosheet import *; register_analysis('UUID-179d7662-be95-47c5-885f-749412f02246')

# Reordered State and County name in county_pop_15_19_df
county_pop_15_19_df_columns = [col for col in county_pop_15_19_df.columns if col != 'State and County name']
county_pop_15_19_df_columns.insert(4, 'State and County name')
county_pop_15_19_df = county_pop_15_19_df[county_pop_15_19_df_columns]

# Reordered State and County name in county_pop_15_19_df
county_pop_15_19_df_columns = [col for col in county_pop_15_19_df.columns if col != 'State and County name']
county_pop_15_19_df_columns.insert(2, 'State and County name')
county_pop_15_19_df = county_pop_15_19_df[county_pop_15_19_df_columns]

county_pop_15_19_df.head()

Unnamed: 0,STNAME,CTYNAME,State and County name,POPESTIMATE2015,POPESTIMATE2019
1,Alabama,Autauga County,"Autauga County, Alabama",54864,55869
2,Alabama,Baldwin County,"Baldwin County, Alabama",202939,223234
3,Alabama,Barbour County,"Barbour County, Alabama",26283,24686
4,Alabama,Bibb County,"Bibb County, Alabama",22566,22394
5,Alabama,Blount County,"Blount County, Alabama",57526,57826


## Cleaning data on migration

### Reading in data from text file that doesn't have column names

In [80]:
# create column names for datafame
col_names = ['State Name of Geography A','County Name of Geography A',
            'State/U.S. Island Area/Foreign Region of Geography B', 'County Name of Geography B', 
            'Flow from Geography B to Geography A - Est', 'Flow from Geography B t',
            'Counterflow from Geography A to Geography B - Est', 'Counterflow from Geography A to Geography B - MOE',
            'Net Migration from Geography B to Geography A - Est', 'Net Migration from Geography B to Geography A - MOE',
            'Gross Migration between Geography A and Geography B - Est', 'Gross Migration between Geography A and Geography B - MOE']

In [81]:
# load population migration data
migration_data = pd.read_csv('Resources/Net_Gross_US.txt',sep=' ', names = col_names, delimiter=r'\s{3,}')
migration_df = pd.DataFrame(migration_data,index=None)
migration_df.head()

  migration_data = pd.read_csv('Resources/Net_Gross_US.txt',sep=' ', names = col_names, delimiter=r'\s{3,}')


Unnamed: 0,State Name of Geography A,County Name of Geography A,State/U.S. Island Area/Foreign Region of Geography B,County Name of Geography B,Flow from Geography B to Geography A - Est,Flow from Geography B t,Counterflow from Geography A to Geography B - Est,Counterflow from Geography A to Geography B - MOE,Net Migration from Geography B to Geography A - Est,Net Migration from Geography B to Geography A - MOE,Gross Migration between Geography A and Geography B - Est,Gross Migration between Geography A and Geography B - MOE
0,001001001003 Alabama,Autauga County,Alabama,Baldwin County,25,26,448,461.0,-423.0,461.0,473.0,463.0
1,001001001005 Alabama,Autauga County,Alabama,Barbour County,4,8,0,22.0,4.0,8.0,4.0,8.0
2,001001001007 Alabama,Autauga County,Alabama,Bibb County,10,16,34,52.0,-24.0,54.0,44.0,54.0
3,001001001009 Alabama,Autauga County,Alabama,Blount County,3,6,0,28.0,3.0,6.0,3.0,6.0
4,001001001015 Alabama,Autauga County,Alabama,Calhoun County,2,4,42,46.0,-40.0,47.0,44.0,46.0


In [82]:
# create cleaned dataframe for with just state and county name and migration
cleaned_migration_df = migration_df[['State Name of Geography A','County Name of Geography A', 'Net Migration from Geography B to Geography A - Est']]
cleaned_migration_df.head()

Unnamed: 0,State Name of Geography A,County Name of Geography A,Net Migration from Geography B to Geography A - Est
0,001001001003 Alabama,Autauga County,-423.0
1,001001001005 Alabama,Autauga County,4.0
2,001001001007 Alabama,Autauga County,-24.0
3,001001001009 Alabama,Autauga County,3.0
4,001001001015 Alabama,Autauga County,-40.0


In [83]:
cleaned_migration_df['State Name of Geography A']=cleaned_migration_df['State Name of Geography A'].str.replace('\d+', '')
cleaned_migration_df.head()

  cleaned_migration_df['State Name of Geography A']=cleaned_migration_df['State Name of Geography A'].str.replace('\d+', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_migration_df['State Name of Geography A']=cleaned_migration_df['State Name of Geography A'].str.replace('\d+', '')


Unnamed: 0,State Name of Geography A,County Name of Geography A,Net Migration from Geography B to Geography A - Est
0,Alabama,Autauga County,-423.0
1,Alabama,Autauga County,4.0
2,Alabama,Autauga County,-24.0
3,Alabama,Autauga County,3.0
4,Alabama,Autauga County,-40.0


In [84]:
# rename columns
cleaned_migration_df.rename(columns = 
                        {'State Name of Geography A':'State',
                        'County Name of Geography A':'County',
                        'Net Migration from Geography B to Geography A - Est':'Net_Migration_2015-19'},
                       inplace = True)
cleaned_migration_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,State,County,Net_Migration_2015-19
0,Alabama,Autauga County,-423.0
1,Alabama,Autauga County,4.0
2,Alabama,Autauga County,-24.0
3,Alabama,Autauga County,3.0
4,Alabama,Autauga County,-40.0


In [85]:
# Create column that combines County and State
cleaned_migration_df['State and County name'] = cleaned_migration_df[['County', 'State']].agg(','.join, axis=1)
cleaned_migration_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_migration_df['State and County name'] = cleaned_migration_df[['County', 'State']].agg(','.join, axis=1)


Unnamed: 0,State,County,Net_Migration_2015-19,State and County name
0,Alabama,Autauga County,-423.0,"Autauga County, Alabama"
1,Alabama,Autauga County,4.0,"Autauga County, Alabama"
2,Alabama,Autauga County,-24.0,"Autauga County, Alabama"
3,Alabama,Autauga County,3.0,"Autauga County, Alabama"
4,Alabama,Autauga County,-40.0,"Autauga County, Alabama"


In [86]:
# drop null values
cleaned_migration_df=cleaned_migration_df.dropna()
cleaned_migration_df.head()

Unnamed: 0,State,County,Net_Migration_2015-19,State and County name
0,Alabama,Autauga County,-423.0,"Autauga County, Alabama"
1,Alabama,Autauga County,4.0,"Autauga County, Alabama"
2,Alabama,Autauga County,-24.0,"Autauga County, Alabama"
3,Alabama,Autauga County,3.0,"Autauga County, Alabama"
4,Alabama,Autauga County,-40.0,"Autauga County, Alabama"


In [87]:
cleaned_migration_df.shape

(426676, 4)

In [88]:
# remove state and county columns
cleaned_migration_df = cleaned_migration_df[['State and County name','Net_Migration_2015-19']]
cleaned_migration_df.head()

Unnamed: 0,State and County name,Net_Migration_2015-19
0,"Autauga County, Alabama",-423.0
1,"Autauga County, Alabama",4.0
2,"Autauga County, Alabama",-24.0
3,"Autauga County, Alabama",3.0
4,"Autauga County, Alabama",-40.0


In [89]:
# group by county to find net migration for each county
net_migration_df = cleaned_migration_df.groupby(['State and County name'], as_index=False).sum()
net_migration_df.head()

Unnamed: 0,State and County name,Net_Migration_2015-19
0,"Abbeville County, South Carolina",110.0
1,"Acadia Parish, Louisiana",-1701.0
2,"Accomack County, Virginia",-953.0
3,"Ada County, Idaho",4908.0
4,"Adair County, Iowa",-408.0


## Merge DataFrames

In [90]:
# merge dataframes
pop_migration_df = county_pop_15_19_df.merge(net_migration_df, on='State and County name')
pop_migration_df.head()

Unnamed: 0,STNAME,CTYNAME,State and County name,POPESTIMATE2015,POPESTIMATE2019,Net_Migration_2015-19
0,Alabama,Autauga County,"Autauga County, Alabama",54864,55869,-2535.0
1,Alabama,Baldwin County,"Baldwin County, Alabama",202939,223234,2295.0
2,Alabama,Barbour County,"Barbour County, Alabama",26283,24686,-287.0
3,Alabama,Bibb County,"Bibb County, Alabama",22566,22394,156.0
4,Alabama,Blount County,"Blount County, Alabama",57526,57826,-753.0


In [91]:
from mitosheet import *; register_analysis('UUID-b7812c44-6066-4ec1-9b2b-962204249c11')

# Reordered State and County name in pop_migration_df
pop_migration_df_columns = [col for col in pop_migration_df.columns if col != 'State and County name']
pop_migration_df_columns.insert(2, 'State and County name')
pop_migration_df = pop_migration_df[pop_migration_df_columns]

pop_migration_df.head()

Unnamed: 0,STNAME,CTYNAME,State and County name,POPESTIMATE2015,POPESTIMATE2019,Net_Migration_2015-19
0,Alabama,Autauga County,"Autauga County, Alabama",54864,55869,-2535.0
1,Alabama,Baldwin County,"Baldwin County, Alabama",202939,223234,2295.0
2,Alabama,Barbour County,"Barbour County, Alabama",26283,24686,-287.0
3,Alabama,Bibb County,"Bibb County, Alabama",22566,22394,156.0
4,Alabama,Blount County,"Blount County, Alabama",57526,57826,-753.0


## Create pivot table showing population change due to population for each county

In [92]:
# add column showing net migration as percentage of 2015 population
pop_migration_df['%_Change_Due_to_Migration'] = pop_migration_df['Net_Migration_2015-19']/pop_migration_df['POPESTIMATE2015']
pop_migration_df.head()

Unnamed: 0,STNAME,CTYNAME,State and County name,POPESTIMATE2015,POPESTIMATE2019,Net_Migration_2015-19,%_Change_Due_to_Migration
0,Alabama,Autauga County,"Autauga County, Alabama",54864,55869,-2535.0,-0.046205
1,Alabama,Baldwin County,"Baldwin County, Alabama",202939,223234,2295.0,0.011309
2,Alabama,Barbour County,"Barbour County, Alabama",26283,24686,-287.0,-0.01092
3,Alabama,Bibb County,"Bibb County, Alabama",22566,22394,156.0,0.006913
4,Alabama,Blount County,"Blount County, Alabama",57526,57826,-753.0,-0.01309


In [93]:
# rename the columns
pop_migration_df.rename(columns = 
                        {'STNAME':'State',
                        'CTYNAME':'County',
                        'State and County name':'County_and_State',
                        'POPESTIMATE2015':'2015_Population',
                        'POPESTIMATE2019':'2019_Population'},
                       inplace = True)

In [94]:
# create pivot table
pop_migration_piv = pd.pivot_table(pop_migration_df, 
               index = ['State','County'], 
               values = ['2015_Population','Net_Migration_2015-19','%_Change_Due_to_Migration'])

In [95]:
pop_migration_piv

Unnamed: 0_level_0,Unnamed: 1_level_0,%_Change_Due_to_Migration,2015_Population,Net_Migration_2015-19
State,County,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,Autauga County,-0.046205,54864,-2535.0
Alabama,Baldwin County,0.011309,202939,2295.0
Alabama,Barbour County,-0.010920,26283,-287.0
Alabama,Bibb County,0.006913,22566,156.0
Alabama,Blount County,-0.013090,57526,-753.0
...,...,...,...,...
Wyoming,Sweetwater County,0.006373,44719,285.0
Wyoming,Teton County,0.012800,23047,295.0
Wyoming,Uinta County,-0.002890,20763,-60.0
Wyoming,Washakie County,-0.001450,8278,-12.0


In [None]:
# save final cleaned dataframe to csv
pop_migration_df.to_csv('Resources/county_level_migration_15-19.csv', index = False)

## Creating a new dataframe for database upload

### Cleaning up migration_df

In [39]:
# Selecting columns from migration_df
New_Migration_df = migration_df[['State Name of Geography A','County Name of Geography A',
            'State/U.S. Island Area/Foreign Region of Geography B', 'County Name of Geography B', 
            'Flow from Geography B to Geography A - Est',
            'Counterflow from Geography A to Geography B - Est',
            'Net Migration from Geography B to Geography A - Est', 
            'Gross Migration between Geography A and Geography B - Est']]
New_Migration_df.head()

Unnamed: 0,State Name of Geography A,County Name of Geography A,State/U.S. Island Area/Foreign Region of Geography B,County Name of Geography B,Flow from Geography B to Geography A - Est,Counterflow from Geography A to Geography B - Est,Net Migration from Geography B to Geography A - Est,Gross Migration between Geography A and Geography B - Est
0,001001001003 Alabama,Autauga County,Alabama,Baldwin County,25,448,-423.0,473.0
1,001001001005 Alabama,Autauga County,Alabama,Barbour County,4,0,4.0,4.0
2,001001001007 Alabama,Autauga County,Alabama,Bibb County,10,34,-24.0,44.0
3,001001001009 Alabama,Autauga County,Alabama,Blount County,3,0,3.0,3.0
4,001001001015 Alabama,Autauga County,Alabama,Calhoun County,2,42,-40.0,44.0


In [40]:
# Renaming columns 
New_Migration_df.rename(columns = {'State Name of Geography A':'State_A', 
                                'County Name of Geography A':'County_A', 
                                'State/U.S. Island Area/Foreign Region of Geography B':'State_B',
                                'County Name of Geography B':'County_B', 
                                'Flow from Geography B to Geography A - Est':'Flow_From_Geo_A_to_Geo_B',
                                'Counterflow from Geography A to Geography B - Est':'Counterflow_From_Geo_A_to_Geo_B', 
                                'Net Migration from Geography B to Geography A - Est':'Net_Migration_from_Geo_B_to_Geo_A',
                                'Gross Migration between Geography A and Geography B - Est':'Gross_Migration_between_Geo_A_and_Geo_B'},
                                inplace =True)
New_Migration_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,State_A,County_A,State_B,County_B,Flow_From_Geo_A_to_Geo_B,Counterflow_From_Geo_A_to_Geo_B,Net_Migration_from_Geo_B_to_Geo_A,Gross_Migration_between_Geo_A_and_Geo_B
0,001001001003 Alabama,Autauga County,Alabama,Baldwin County,25,448,-423.0,473.0
1,001001001005 Alabama,Autauga County,Alabama,Barbour County,4,0,4.0,4.0
2,001001001007 Alabama,Autauga County,Alabama,Bibb County,10,34,-24.0,44.0
3,001001001009 Alabama,Autauga County,Alabama,Blount County,3,0,3.0,3.0
4,001001001015 Alabama,Autauga County,Alabama,Calhoun County,2,42,-40.0,44.0


In [41]:
# Removing unnecessary strings from State_A 
New_Migration_df['State_A']=New_Migration_df['State_A'].str.replace('\d+', '')
New_Migration_df.head()

  New_Migration_df['State_A']=New_Migration_df['State_A'].str.replace('\d+', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  New_Migration_df['State_A']=New_Migration_df['State_A'].str.replace('\d+', '')


Unnamed: 0,State_A,County_A,State_B,County_B,Flow_From_Geo_A_to_Geo_B,Counterflow_From_Geo_A_to_Geo_B,Net_Migration_from_Geo_B_to_Geo_A,Gross_Migration_between_Geo_A_and_Geo_B
0,Alabama,Autauga County,Alabama,Baldwin County,25,448,-423.0,473.0
1,Alabama,Autauga County,Alabama,Barbour County,4,0,4.0,4.0
2,Alabama,Autauga County,Alabama,Bibb County,10,34,-24.0,44.0
3,Alabama,Autauga County,Alabama,Blount County,3,0,3.0,3.0
4,Alabama,Autauga County,Alabama,Calhoun County,2,42,-40.0,44.0


In [42]:
# Creating new columns combining state and county
New_Migration_df['State_and_County_A'] = New_Migration_df[['County_A', 'State_A']].agg(','.join, axis=1)
New_Migration_df['State_and_County_B'] = New_Migration_df[['County_B', 'State_B']].agg(','.join, axis=1)
New_Migration_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  New_Migration_df['State_and_County_A'] = New_Migration_df[['County_A', 'State_A']].agg(','.join, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  New_Migration_df['State_and_County_B'] = New_Migration_df[['County_B', 'State_B']].agg(','.join, axis=1)


Unnamed: 0,State_A,County_A,State_B,County_B,Flow_From_Geo_A_to_Geo_B,Counterflow_From_Geo_A_to_Geo_B,Net_Migration_from_Geo_B_to_Geo_A,Gross_Migration_between_Geo_A_and_Geo_B,State_and_County_A,State_and_County_B
0,Alabama,Autauga County,Alabama,Baldwin County,25,448,-423.0,473.0,"Autauga County, Alabama","Baldwin County,Alabama"
1,Alabama,Autauga County,Alabama,Barbour County,4,0,4.0,4.0,"Autauga County, Alabama","Barbour County,Alabama"
2,Alabama,Autauga County,Alabama,Bibb County,10,34,-24.0,44.0,"Autauga County, Alabama","Bibb County,Alabama"
3,Alabama,Autauga County,Alabama,Blount County,3,0,3.0,3.0,"Autauga County, Alabama","Blount County,Alabama"
4,Alabama,Autauga County,Alabama,Calhoun County,2,42,-40.0,44.0,"Autauga County, Alabama","Calhoun County,Alabama"


In [44]:
# drop null values
New_Migration_df=New_Migration_df.dropna()
New_Migration_df.head()

Unnamed: 0,State_A,County_A,State_B,County_B,Flow_From_Geo_A_to_Geo_B,Counterflow_From_Geo_A_to_Geo_B,Net_Migration_from_Geo_B_to_Geo_A,Gross_Migration_between_Geo_A_and_Geo_B,State_and_County_A,State_and_County_B
0,Alabama,Autauga County,Alabama,Baldwin County,25,448,-423.0,473.0,"Autauga County, Alabama","Baldwin County,Alabama"
1,Alabama,Autauga County,Alabama,Barbour County,4,0,4.0,4.0,"Autauga County, Alabama","Barbour County,Alabama"
2,Alabama,Autauga County,Alabama,Bibb County,10,34,-24.0,44.0,"Autauga County, Alabama","Bibb County,Alabama"
3,Alabama,Autauga County,Alabama,Blount County,3,0,3.0,3.0,"Autauga County, Alabama","Blount County,Alabama"
4,Alabama,Autauga County,Alabama,Calhoun County,2,42,-40.0,44.0,"Autauga County, Alabama","Calhoun County,Alabama"


### Cleaning up population dataframe

In [46]:
# Creating new copy of county_pop dataframe for processing 
new_county_pop_15_19_df = county_pop_15_19_df
# Renaming state and county column
new_county_pop_15_19_df.rename(columns={'State and County name':'State_and_County_A'}, inplace=True)

### Merging dataframes and cleaning

In [47]:
# Merge dataframes
new_pop_migration_df = county_pop_15_19_df.merge(New_Migration_df, on='State_and_County_A')
new_pop_migration_df.head()

Unnamed: 0,STNAME,CTYNAME,State_and_County_A,POPESTIMATE2015,POPESTIMATE2019,State_A,County_A,State_B,County_B,Flow_From_Geo_A_to_Geo_B,Counterflow_From_Geo_A_to_Geo_B,Net_Migration_from_Geo_B_to_Geo_A,Gross_Migration_between_Geo_A_and_Geo_B,State_and_County_B
0,Alabama,Autauga County,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Baldwin County,25,448,-423.0,473.0,"Baldwin County,Alabama"
1,Alabama,Autauga County,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Barbour County,4,0,4.0,4.0,"Barbour County,Alabama"
2,Alabama,Autauga County,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Bibb County,10,34,-24.0,44.0,"Bibb County,Alabama"
3,Alabama,Autauga County,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Blount County,3,0,3.0,3.0,"Blount County,Alabama"
4,Alabama,Autauga County,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Calhoun County,2,42,-40.0,44.0,"Calhoun County,Alabama"


In [48]:
# Dropping duplicate columns 
new_pop_migration_df.drop(columns=['STNAME','CTYNAME'], axis=1, inplace=True)
new_pop_migration_df.head()

Unnamed: 0,State_and_County_A,POPESTIMATE2015,POPESTIMATE2019,State_A,County_A,State_B,County_B,Flow_From_Geo_A_to_Geo_B,Counterflow_From_Geo_A_to_Geo_B,Net_Migration_from_Geo_B_to_Geo_A,Gross_Migration_between_Geo_A_and_Geo_B,State_and_County_B
0,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Baldwin County,25,448,-423.0,473.0,"Baldwin County,Alabama"
1,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Barbour County,4,0,4.0,4.0,"Barbour County,Alabama"
2,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Bibb County,10,34,-24.0,44.0,"Bibb County,Alabama"
3,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Blount County,3,0,3.0,3.0,"Blount County,Alabama"
4,"Autauga County, Alabama",54864,55869,Alabama,Autauga County,Alabama,Calhoun County,2,42,-40.0,44.0,"Calhoun County,Alabama"


In [49]:
# Reordering columns
new_pop_migration_df = new_pop_migration_df[['State_A','County_A','State_and_County_A',
                                             'State_B','County_B','State_and_County_B','POPESTIMATE2015','POPESTIMATE2019',
                                             'Flow_From_Geo_A_to_Geo_B','Counterflow_From_Geo_A_to_Geo_B',
                                             'Net_Migration_from_Geo_B_to_Geo_A','Gross_Migration_between_Geo_A_and_Geo_B']]
new_pop_migration_df.head()

Unnamed: 0,State_A,County_A,State_and_County_A,State_B,County_B,State_and_County_B,POPESTIMATE2015,POPESTIMATE2019,Flow_From_Geo_A_to_Geo_B,Counterflow_From_Geo_A_to_Geo_B,Net_Migration_from_Geo_B_to_Geo_A,Gross_Migration_between_Geo_A_and_Geo_B
0,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Baldwin County,"Baldwin County,Alabama",54864,55869,25,448,-423.0,473.0
1,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Barbour County,"Barbour County,Alabama",54864,55869,4,0,4.0,4.0
2,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Bibb County,"Bibb County,Alabama",54864,55869,10,34,-24.0,44.0
3,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Blount County,"Blount County,Alabama",54864,55869,3,0,3.0,3.0
4,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Calhoun County,"Calhoun County,Alabama",54864,55869,2,42,-40.0,44.0


In [50]:
# Renaming population columns
new_pop_migration_df.rename(columns={'POPESTIMATE2015':'Geo_A_Pop_2015','POPESTIMATE2019':'Geo_A_Pop_2019'}, inplace=True)
new_pop_migration_df.head()

Unnamed: 0,State_A,County_A,State_and_County_A,State_B,County_B,State_and_County_B,Geo_A_Pop_2015,Geo_A_Pop_2019,Flow_From_Geo_A_to_Geo_B,Counterflow_From_Geo_A_to_Geo_B,Net_Migration_from_Geo_B_to_Geo_A,Gross_Migration_between_Geo_A_and_Geo_B
0,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Baldwin County,"Baldwin County,Alabama",54864,55869,25,448,-423.0,473.0
1,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Barbour County,"Barbour County,Alabama",54864,55869,4,0,4.0,4.0
2,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Bibb County,"Bibb County,Alabama",54864,55869,10,34,-24.0,44.0
3,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Blount County,"Blount County,Alabama",54864,55869,3,0,3.0,3.0
4,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Calhoun County,"Calhoun County,Alabama",54864,55869,2,42,-40.0,44.0


In [51]:
# Creating new columns showing net migration to Geo A as percentage of 2015 population
new_pop_migration_df['%_Change_Due_to_Migration_Geo_A'] = new_pop_migration_df['Net_Migration_from_Geo_B_to_Geo_A']/new_pop_migration_df['Geo_A_Pop_2015']
new_pop_migration_df.head()

Unnamed: 0,State_A,County_A,State_and_County_A,State_B,County_B,State_and_County_B,Geo_A_Pop_2015,Geo_A_Pop_2019,Flow_From_Geo_A_to_Geo_B,Counterflow_From_Geo_A_to_Geo_B,Net_Migration_from_Geo_B_to_Geo_A,Gross_Migration_between_Geo_A_and_Geo_B,%_Change_Due_to_Migration_Geo_A
0,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Baldwin County,"Baldwin County,Alabama",54864,55869,25,448,-423.0,473.0,-0.00771
1,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Barbour County,"Barbour County,Alabama",54864,55869,4,0,4.0,4.0,7.3e-05
2,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Bibb County,"Bibb County,Alabama",54864,55869,10,34,-24.0,44.0,-0.000437
3,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Blount County,"Blount County,Alabama",54864,55869,3,0,3.0,3.0,5.5e-05
4,Alabama,Autauga County,"Autauga County, Alabama",Alabama,Calhoun County,"Calhoun County,Alabama",54864,55869,2,42,-40.0,44.0,-0.000729


In [53]:
# create pivot table
new_pop_migration_piv = pd.pivot_table(new_pop_migration_df, 
               index = ['State_A','County_A'], 
               values = ['Geo_A_Pop_2015','Net_Migration_from_Geo_B_to_Geo_A','%_Change_Due_to_Migration_Geo_A'])

In [54]:
new_pop_migration_piv

Unnamed: 0_level_0,Unnamed: 1_level_0,%_Change_Due_to_Migration_Geo_A,Geo_A_Pop_2015,Net_Migration_from_Geo_B_to_Geo_A
State_A,County_A,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,Autauga County,-0.000364,54864,-19.960630
Alabama,Baldwin County,0.000035,202939,7.083333
Alabama,Barbour County,-0.000138,26283,-3.632911
Alabama,Bibb County,0.000123,22566,2.785714
Alabama,Blount County,-0.000160,57526,-9.182927
...,...,...,...,...
Wyoming,Sweetwater County,0.000050,44719,2.226562
Wyoming,Teton County,0.000078,23047,1.787879
Wyoming,Uinta County,-0.000040,20763,-0.833333
Wyoming,Washakie County,-0.000034,8278,-0.279070
