## Mobility Patterns in US Cities

### Exploratory Data Analysis: Temporal Source-Sink Mapping

In [1]:
import pandas as pd
import geopandas as gpd
import os
import matplotlib.pyplot as plt

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
#set working directory
os.chdir('/projects/mpi/shared/Data')

In [3]:
#read in geodata for mapping
geo = gpd.read_file('all_cts.shp')

In [4]:
#import processed data files at the census tract level
ny = pd.read_csv('ny_ct.csv')

In [5]:
ny.head(100)

Unnamed: 0,origin_ct,dest_ct,time_period,origin_st,dest_st,od_counts,num_dates
0,100,100,10:00 - 10:30,34,34,17,10
1,100,100,6:00 - 6:30,34,34,3,3
2,100,100,6:30 - 7:00,34,34,19,13
3,100,100,6:30 - 7:00,34,36,2,1
4,100,100,7:00 - 7:30,34,34,38,23
5,100,100,7:30 - 8:00,34,34,5,5
6,100,100,8:00 - 8:30,34,34,17,10
7,100,100,8:30 - 9:00,34,34,35,18
8,100,100,9:00 - 9:30,34,34,37,17
9,100,100,9:30 - 10:00,34,34,31,17


In [6]:
geo.head()

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,6,1,400600,1400000US06001400600,6001400600,4006.0,CT,297856,0,"POLYGON ((-122.26807 37.844136, -122.26514 37...."
1,6,1,400900,1400000US06001400900,6001400900,4009.0,CT,420877,0,"POLYGON ((-122.285576 37.839778, -122.283186 3..."
2,6,1,401400,1400000US06001401400,6001401400,4014.0,CT,758204,0,"POLYGON ((-122.278611 37.826878, -122.268563 3..."
3,6,1,403000,1400000US06001403000,6001403000,4030.0,CT,352394,0,"POLYGON ((-122.274757 37.79883299999999, -122...."
4,6,1,405902,1400000US06001405902,6001405902,4059.02,CT,487280,0,"POLYGON ((-122.247175 37.789913, -122.243512 3..."


In [9]:
ct_files = ['ny_ct.csv','chic_ct.csv','sf_ct.csv','la_ct.csv']

for ct_file in ct_files:  
    df_ct = pd.read_csv(ct_file)   
    df_ct_loss = df_ct.loc[df_ct['origin_ct']!=df_ct['dest_ct'],['origin_ct','time_period','od_counts']].groupby(['origin_ct','time_period']).sum()
    df_ct_loss.reset_index(inplace=True)
    df_ct_loss.rename(columns={'od_counts':'loss','origin_ct':'ct_id'},inplace=True)
    df_ct_gain = df_ct.loc[df_ct['origin_ct']!=df_ct['dest_ct'],['dest_ct','time_period','od_counts']].groupby(['dest_ct','time_period']).sum()
    df_ct_gain.reset_index(inplace=True)
    df_ct_gain.rename(columns={'od_counts':'gain','dest_ct':'ct_id'},inplace=True)
    df_ct_stay = df_ct.loc[df_ct['origin_ct']==df_ct['dest_ct'],['dest_ct','time_period','od_counts']].groupby(['dest_ct','time_period']).sum()
    df_ct_stay.reset_index(inplace=True)
    df_ct_stay.rename(columns={'od_counts':'stay','dest_ct':'ct_id'},inplace=True)
    df_ct_agg = pd.merge(df_ct_gain,df_ct_loss,on=['ct_id','time_period'])
    df_ct_agg = pd.merge(df_ct_agg,df_ct_stay,on=['ct_id','time_period'])
    df_ct_agg = df_ct_agg.loc[:,['ct_id','time_period','gain','loss','stay']]
    df_ct_agg.fillna(0,inplace=True)
    city = ct_file.split('.')[0]
    city += '_agg.csv'
    df_ct_agg.to_csv(city)

In [17]:
pd.read_csv('ny_ct_agg.csv').head()

Unnamed: 0.1,Unnamed: 0,ct_id,time_period,gain,loss,stay
0,0,100,10:00 - 10:30,10156,7738,17
1,1,100,6:00 - 6:30,7440,5966,3
2,2,100,6:30 - 7:00,9893,6987,21
3,3,100,7:00 - 7:30,11074,8252,38
4,4,100,7:30 - 8:00,11382,8814,5
