In [1]:
# ----------------------------------------------------------------------
# Source: https://github.com/BorisMaillard/CJ_Mobility_Visualization
# Please feel free to contact me if you have any questions
# Boris Maillard (boris.maillard@gmail.com)
# ----------------------------------------------------------------------
# Resources:
#   https://pbpython.com/pandas-pivot-table-explained.html
#   https://www.dataquest.io/blog/pandas-pivot-table/
#   https://www.digitalocean.com/community/tutorials/data-analysis-and-
#             visualization-with-pandas-and-jupyter-notebook-in-python-3
# ----------------------------------------------------------------------


In [2]:
# ----------------------------------------------------------------------
# Import of required packages ------------------------------------------
# ----------------------------------------------------------------------

import numpy as np      # multi-dimensional arrays
import pandas as pd     # data analysis

#import matplotlib.pyplot as pp    # visualize data
#import seaborn         # more aesthetic matplotlib statistical graphics 
#import zipfile         # uncompress zip archive into  current directory
#matplotlib inline      # keep our graphs inline


In [3]:
# ----------------------------------------------------------------------
# Import of data files -------------------------------------------------
# ----------------------------------------------------------------------

# Naming of raws for each CSV file with IN or OUT data:

dt_IN_rows = ['Census','StateOrigin','CountyOrigin','MunicipalityOrigin',
              'EnteringArea','DirectionalCategories','InTown',
              'CountyDestination','MunicipalityDestination',
              'TravelMode',
              'TotalEstCommutersIN','InCityCommutersIN',
              'OutOfTownCommutersIN','MarginOfError']
dt_OUT_rows = ['Census','CountyOrigin','MunicipalityOrigin',
               'DestinationState','DestinationCounty',
               'DestinationMunicipality','LeavingArea',
               'DirectionalCategories','InTown','ModeToWork',
               'TotalEstCommutersOUT','InCityCommutersOUT',
               'OutOfTownCommutersOUT','MarginOfError']

# Import of data from csv files with:
#   - skip rows: exclusion of first line
#   - na_values: no missing data

dt_IN = pd.read_csv('DataCJflowsIN.csv', names = dt_IN_rows, 
                    skiprows = 1, na_values = ['.'])
dt_OUT = pd.read_csv('DataCJflowsOUT.csv', names = dt_OUT_rows, 
                     skiprows = 1, na_values = ['.'])


In [4]:
# Printing of number of lines and 2 first results:

print("Data OUT:",dt_OUT.size,"lines")
#print(dt_OUT.head(2),"\n")

print("Data IN:", dt_IN.size,"lines")
#print(dt_IN.head(2),"\n")


Data OUT: 63588 lines
Data IN: 104678 lines


In [5]:
# Check of variable types:

#print(dt_IN['Census'].dtypes)
#print(dt_IN['TotalEstCommutersIN'].dtypes)


In [6]:
# ----------------------------------------------------------------------
# Pivot calculations ---------------------------------------------------
# ----------------------------------------------------------------------
   
# Comments:
#   use fill_value to set NaN values to 0.
#   use aggfunc=np.sum to make the sum of values (not the average value)
#   use margines=True to get the totals


In [7]:
# IN_data:

pivot_table_IN = pd.pivot_table(dt_IN,
                                ['TotalEstCommutersIN',
                                 'InCityCommutersIN',
                                 'OutOfTownCommutersIN'],
                                ['EnteringArea',
                                 'DirectionalCategories',
                                 'StateOrigin',
                                 'CountyOrigin',
                                 'MunicipalityOrigin'],
                                'Census',
                                fill_value=0, 
                                aggfunc=[np.sum], 
                                margins=True)
pivot_table_IN

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,sum,sum,sum,sum,sum,sum,sum,sum,sum
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,InCityCommutersIN,InCityCommutersIN,InCityCommutersIN,OutOfTownCommutersIN,OutOfTownCommutersIN,OutOfTownCommutersIN,TotalEstCommutersIN,TotalEstCommutersIN,TotalEstCommutersIN
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Census,2009-2013,2011-2015,All,2009-2013,2011-2015,All,2009-2013,2011-2015,All
EnteringArea,DirectionalCategories,StateOrigin,CountyOrigin,MunicipalityOrigin,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
in area,US1-Northeast,New Jersey,Mercer County,Ewing township,0,0,0,96,9926,10022,96,9926,10022
in area,US1-Northeast,New Jersey,Middlesex County,New Brunswick city,0,0,0,4817,4225,9042,4817,4225,9042
in area,US1-Northeast,New Jersey,Middlesex County,North Brunswick township,0,0,0,6688,6977,13665,6688,6977,13665
in area,US1-Northeast,New Jersey,Somerset County,Franklin township,0,0,0,7797,8133,15930,7797,8133,15930
in area,US1-Northeast,New Jersey,Somerset County,Hillsborough township,0,0,0,4139,4735,8874,4139,4735,8874
in area,US1-Northeast,New Jersey,Somerset County,Montgomery township,0,0,0,3902,4331,8233,3902,4331,8233
in area,US1-Northeast,New Jersey,Somerset County,Rocky Hill borough,0,0,0,147,202,349,147,202,349
in area,US1-Northwest,New Jersey,Mercer County,Ewing township,0,0,0,9189,0,9189,9189,0,9189
in area,US1-Northwest,New Jersey,Mercer County,Hopewell borough,0,0,0,660,709,1369,660,709,1369
in area,US1-Northwest,New Jersey,Mercer County,Hopewell township,0,0,0,5474,5429,10903,5474,5429,10903


In [8]:
# OUT_data

pivot_table_OUT = pd.pivot_table(dt_OUT,
                                 ['TotalEstCommutersOUT',
                                  'InCityCommutersOUT',
                                  'OutOfTownCommutersOUT'],
                                 ['DirectionalCategories',
                                  'DestinationState',
                                  'DestinationCounty',
                                  'DestinationMunicipality'],
                                 'Census',
                                 fill_value=0, 
                                 aggfunc=[np.sum], 
                                 margins=True)

pivot_table_OUT

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,sum,sum,sum,sum,sum,sum,sum,sum,sum
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,InCityCommutersOUT,InCityCommutersOUT,InCityCommutersOUT,OutOfTownCommutersOUT,OutOfTownCommutersOUT,OutOfTownCommutersOUT,TotalEstCommutersOUT,TotalEstCommutersOUT,TotalEstCommutersOUT
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Census,2009-2013,2011-2015,All,2009-2013,2011-2015,All,2009-2013,2011-2015,All
DirectionalCategories,DestinationState,DestinationCounty,DestinationMunicipality,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
NJ-NorthEast,New Jersey,Bergen County,Allendale borough,0,0,0.0,0,16,16.0,0,16,16.0
NJ-NorthEast,New Jersey,Bergen County,Carlstadt borough,0,0,0.0,0,50,50.0,0,50,50.0
NJ-NorthEast,New Jersey,Bergen County,Closter borough,0,0,0.0,0,15,15.0,0,15,15.0
NJ-NorthEast,New Jersey,Bergen County,East Rutherford borough,0,0,0.0,0,24,24.0,0,24,24.0
NJ-NorthEast,New Jersey,Bergen County,Elmwood Park borough,0,0,0.0,0,74,74.0,0,74,74.0
NJ-NorthEast,New Jersey,Bergen County,Englewood Cliffs borough,0,0,0.0,0,29,29.0,0,29,29.0
NJ-NorthEast,New Jersey,Bergen County,Englewood city,0,0,0.0,0,126,126.0,0,126,126.0
NJ-NorthEast,New Jersey,Bergen County,Fair Lawn borough,0,0,0.0,0,4,4.0,0,4,4.0
NJ-NorthEast,New Jersey,Bergen County,Fairview borough,0,0,0.0,0,30,30.0,0,30,30.0
NJ-NorthEast,New Jersey,Bergen County,Fort Lee borough,0,0,0.0,43,49,92.0,43,49,92.0
