In [None]:
# Source: https://github.com/BorisMaillard/CJ_Mobility_Visualization
# Please feel free to contact me if you have any questions
# Boris Maillard (boris.maillard@gmail.com)

In [1]:
# -----------------------------------------------------------------------------------------
# Import of required packages -------------------------------------------------------------
# -----------------------------------------------------------------------------------------

import numpy as np                  # to support multi-dimensional arrays
import matplotlib.pyplot as pp      # to visualize data
import pandas as pd                 # for data analysis
import seaborn                      # to make our matplotlib statistical graphics more aesthetic

# import zipfile                      # to uncompress the zip archive into the current directory
# matplotlib inline                   # to keep our graphs inline:

In [2]:
# -----------------------------------------------------------------------------------------
# Import of data files ---------------------------------------------------------------------
# -----------------------------------------------------------------------------------------

## Naming of raws for each CSV file with IN or OUT data
dt_IN_rows = ['Census','StateOrigin','CountyOrigin','MunicipalityOrigin',
              'EnteringArea','DirectionalCategories','InTown',
              'CountyDestination','MunicipalityDestination',
              'TravelMode',
              'TotalEstCommutersIN','InCityCommutersIN','OutOfTownCommutersIN',
              'MarginOfError']
dt_OUT_rows = ['Census','CountyOrigin','MunicipalityOrigin',
               'DestinationState','DestinationCounty','DestinationMunicipality',
               'LeavingArea','DirectionalCategories',
               'InTown','ModeToWork',
               'TotalEstCommutersOUT','InCityCommutersOUT','OutOfTownCommutersOUT','MarginOfError']

## Import with exclusion of first line (skip rows and no missing data (na_values)
dt_IN = pd.read_csv('DataCJflowsIN.csv', names = dt_IN_rows, skiprows = 1, na_values = ['.'])
dt_OUT = pd.read_csv('DataCJflowsOUT.csv', names = dt_OUT_rows, skiprows = 1, na_values = ['.'])


In [3]:
## Printing of 2 first results
print()
print("Data OUT:",dt_OUT.size,"lines\n")
print(dt_OUT.head(2))
print("\n")
print("Data IN:", dt_IN.size,"lines\n")
print(dt_IN.head(2))

## Check of variable types
# print(OutData['Census'].dtypes)
# print(dt_IN['TotalEstCommutersIN'].dtypes)
# print(dt_IN['InCityCommutersIN'].dtypes)
# print(dt_IN['OutOfTownCommutersIN'].dtypes)
# print(OutData['Census'].dtypes)
# print(dt_OUT['TotalEstCommutersOUT'].dtypes)
# print(dt_OUT['InCityCommutersOUT'].dtypes)
# print(dt_OUT['OutOfTownCommutersOUT'].dtypes)


Data OUT: 63588 lines

      Census   CountyOrigin     MunicipalityOrigin DestinationState  \
0  2011-2015  Mercer County  East Windsor township          Arizona   
1  2011-2015  Mercer County      Lawrence township          Arizona   

  DestinationCounty DestinationMunicipality   LeavingArea  \
0   Maricopa County                 Phoenix  out of state   
1   Maricopa County                 Phoenix  out of state   

  DirectionalCategories       InTown          ModeToWork  \
0   Other area location  out-of-town  Data-not-available   
1   Other area location  out-of-town  Data-not-available   

   TotalEstCommutersOUT  InCityCommutersOUT  OutOfTownCommutersOUT  \
0                     8                   0                      8   
1                    16                   0                     16   

   MarginOfError  
0             15  
1             25  


Data IN: 104678 lines

      Census StateOrigin    CountyOrigin MunicipalityOrigin  EnteringArea  \
0  2011-2015     Alabama  B

In [4]:
# -----------------------------------------------------------------------------------------
# Pivot calculations ----------------------------------------------------------------------
# -----------------------------------------------------------------------------------------

# dt_IN.groupby(['Census','StateOrigin','CountyOrigin','CountyDestination']).sum()

# pd.pivot_table(dt_IN,'TotalEstCommutersIN',['StateOrigin','CountyOrigin','CountyDestination'],'Census')

# pd.pivot_table(dt_IN,['TotalEstCommutersIN','InCityCommutersIN','OutOfTownCommutersIN'],
#                      ['DirectionalCategories','CountyDestination','MunicipalityDestination'],
#                      'Census')

In [5]:
# IN_data:
#
# 'Census','StateOrigin','CountyOrigin','MunicipalityOrigin',
# 'EnteringArea','DirectionalCategories','InTown',
# 'CountyDestination','MunicipalityDestination',
# 'TravelMode','TotalEstCommutersIN','InCityCommutersIN',
# 'OutOfTownCommutersIN','MarginOfError'
 
# JOEL-IN table:
JOEL_IN_pivot_table = pd.pivot_table(dt_IN,['TotalEstCommutersIN','InCityCommutersIN','OutOfTownCommutersIN'],
                     ['EnteringArea','DirectionalCategories','StateOrigin','CountyOrigin','MunicipalityOrigin'],
                     'Census')
JOEL_IN_pivot_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,InCityCommutersIN,InCityCommutersIN,OutOfTownCommutersIN,OutOfTownCommutersIN,TotalEstCommutersIN,TotalEstCommutersIN
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Census,2009-2013,2011-2015,2009-2013,2011-2015,2009-2013,2011-2015
EnteringArea,DirectionalCategories,StateOrigin,CountyOrigin,MunicipalityOrigin,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
in area,US1-Northeast,New Jersey,Mercer County,Ewing township,0.000000,0.0,48.000000,472.666667,48.000000,472.666667
in area,US1-Northeast,New Jersey,Middlesex County,New Brunswick city,0.000000,0.0,117.487805,248.529412,117.487805,248.529412
in area,US1-Northeast,New Jersey,Middlesex County,North Brunswick township,0.000000,0.0,185.777778,348.850000,185.777778,348.850000
in area,US1-Northeast,New Jersey,Somerset County,Franklin township,0.000000,0.0,229.323529,428.052632,229.323529,428.052632
in area,US1-Northeast,New Jersey,Somerset County,Hillsborough township,0.000000,0.0,147.821429,236.750000,147.821429,236.750000
in area,US1-Northeast,New Jersey,Somerset County,Montgomery township,0.000000,0.0,150.076923,216.550000,150.076923,216.550000
in area,US1-Northeast,New Jersey,Somerset County,Rocky Hill borough,0.000000,0.0,12.250000,12.625000,12.250000,12.625000
in area,US1-Northwest,New Jersey,Mercer County,Ewing township,0.000000,,224.121951,,224.121951,
in area,US1-Northwest,New Jersey,Mercer County,Hopewell borough,0.000000,0.0,28.695652,37.315789,28.695652,37.315789
in area,US1-Northwest,New Jersey,Mercer County,Hopewell township,0.000000,0.0,195.500000,285.736842,195.500000,285.736842


In [6]:
# OUT_data
#
# 'Census','CountyOrigin','MunicipalityOrigin',
# 'DestinationState','DestinationCounty','DestinationMunicipality',
# 'LeavingArea','DirectionalCategories',
# 'InTown','ModeToWork',
# 'TotalEstCommutersOUT','InCityCommutersOUT','OutOfTownCommutersOUT','MarginOfError']

# JOEL_OUT_pivot_table:
JOEL_OUT_pivot_table = pd.pivot_table(dt_OUT,['TotalEstCommutersOUT','InCityCommutersOUT','OutOfTownCommutersOUT'],
                     ['DirectionalCategories','DestinationState','DestinationCounty','DestinationMunicipality'],
                     'Census')
JOEL_OUT_pivot_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,InCityCommutersOUT,InCityCommutersOUT,OutOfTownCommutersOUT,OutOfTownCommutersOUT,TotalEstCommutersOUT,TotalEstCommutersOUT
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Census,2009-2013,2011-2015,2009-2013,2011-2015,2009-2013,2011-2015
DirectionalCategories,DestinationState,DestinationCounty,DestinationMunicipality,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
NJ-NorthEast,New Jersey,Bergen County,Allendale borough,,0.0,,8.000000,,8.000000
NJ-NorthEast,New Jersey,Bergen County,Carlstadt borough,,0.0,,12.500000,,12.500000
NJ-NorthEast,New Jersey,Bergen County,Closter borough,,0.0,,15.000000,,15.000000
NJ-NorthEast,New Jersey,Bergen County,East Rutherford borough,,0.0,,12.000000,,12.000000
NJ-NorthEast,New Jersey,Bergen County,Elmwood Park borough,,0.0,,18.500000,,18.500000
NJ-NorthEast,New Jersey,Bergen County,Englewood Cliffs borough,,0.0,,14.500000,,14.500000
NJ-NorthEast,New Jersey,Bergen County,Englewood city,,0.0,,42.000000,,42.000000
NJ-NorthEast,New Jersey,Bergen County,Fair Lawn borough,,0.0,,4.000000,,4.000000
NJ-NorthEast,New Jersey,Bergen County,Fairview borough,,0.0,,30.000000,,30.000000
NJ-NorthEast,New Jersey,Bergen County,Fort Lee borough,0.000000,0.0,43.000000,16.333333,43.000000,16.333333
