## Create the Earnings JSON data for Tiles

In [1]:
import pandas as pd
import geopandas as gp
import fiona
import shapely.geometry as geom

from pathlib import Path

#### Locate file needed for processing

In [2]:
ODpath = Path("../data/OD/")
OD_file = ODpath.joinpath("od_distance_1k-15k_15-60_miles_continental.csv")
if OD_file.exists ():
    print ("OD file exist")
else:
    print ("OD file does not exist")
    

OD file exist


#### Read file

In [3]:
df = pd.read_csv(OD_file, dtype={'w_geocode': str,'h_geocode':str, 'w_group_count':str})

print ('\nThe number of records:', "{:,}".format(len(df)),'\n\n')

df.head(3)


The number of records: 8,770,266 




Unnamed: 0,w_geocode,h_geocode,distance,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,w_lat,w_lon,h_lat,h_lon,state,w_group_count
0,10010205001001,10010209001001,30634,1,1,0,0,1,0,0,0,0,1,32.457,-86.415,32.705,-86.559,1,1119
1,10010205001001,10010209001002,29897,2,2,0,0,1,1,0,0,0,2,32.457,-86.415,32.694,-86.567,1,1119
2,10010205001001,10010209001015,31828,1,1,0,0,0,1,0,0,0,1,32.457,-86.415,32.699,-86.597,1,1119


#### Create a LineString geometry

In [4]:
df['geometry'] = df.apply(lambda x: geom.LineString([(x['w_lon'], x['w_lat'] ), (x['h_lon'],x['h_lat'])]), axis = 1)
print ('\nLineString geometries created.')


LineString geometries created.


#### Drop unecessary columns

In [5]:
df.drop(['h_geocode', 'h_lat', 'h_lon', 'w_group_count', 'w_lat', 'w_lon', 'state'],axis=1, inplace=True)

df.head(3)

Unnamed: 0,w_geocode,distance,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,geometry
0,10010205001001,30634,1,1,0,0,1,0,0,0,0,1,"LINESTRING (-86.41500000000001 32.457, -86.559..."
1,10010205001001,29897,2,2,0,0,1,1,0,0,0,2,"LINESTRING (-86.41500000000001 32.457, -86.567..."
2,10010205001001,31828,1,1,0,0,0,1,0,0,0,1,"LINESTRING (-86.41500000000001 32.457, -86.597..."


#### Build DataFrames & Process based on distance of commute

In [6]:
df_earn1 = df[df['SE01'] >= 1].copy()
earn1_short = df_earn1[(df_earn1['distance'] < 32000)].copy()
earn1_medium = df_earn1[((df_earn1['distance'] >= 32000) & (df_earn1['distance'] < 46000))].copy()
earn1_long = df_earn1[(df_earn1['distance'] >= 46000)].copy()
print ('\nThe number of records earn1_short:', "{:,}".format(len(earn1_short)))
print ('The number of records earn1_medium:', "{:,}".format(len(earn1_medium)))
print ('The number of records earn1_long: ', "{:,}".format(len(earn1_long)))

df_earn2 = df[df['SE02'] >= 1].copy()
earn2_short = df_earn2[(df_earn2['distance'] < 32000)].copy()
earn2_medium = df_earn2[((df_earn2['distance'] >= 32000) & (df_earn2['distance'] < 46000))].copy()
earn2_long = df_earn2[(df_earn2['distance'] >= 46000)].copy()
print ('\nThe number of records earn2_short:', "{:,}".format(len(earn2_short)))
print ('The number of records earn2_medium:', "{:,}".format(len(earn2_medium)))
print ('The number of recordsearn2_long: ', "{:,}".format(len(earn2_long)))

df_earn3 = df[df['SE03'] >= 1].copy()
earn3_short = df_earn3[(df_earn3['distance'] < 32000)].copy()
earn3_medium = df_earn3[((df_earn3['distance'] >= 32000) & (df_earn3['distance'] < 46000))].copy()
earn3_long = df_earn3[(df_earn3['distance'] >= 46000)].copy()
print ('\nThe number of records earn3_short:', "{:,}".format(len(earn3_short)))
print ('The number of records earn3_medium:', "{:,}".format(len(earn3_medium)))
print ('The number of records earn3_long: ', "{:,}".format(len(earn3_long)))


The number of records earn1_short: 309,021
The number of records earn1_medium: 298,508
The number of records earn1_long:  419,147

The number of records earn2_short: 736,704
The number of records earn2_medium: 685,151
The number of recordsearn2_long:  828,990

The number of records earn3_short: 1,919,316
The number of records earn3_medium: 1,879,961
The number of records earn3_long:  1,943,777


#### Remove the unneeded columns

In [7]:
earn1_short.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
earn1_medium.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
earn1_long.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
earn2_short.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
earn2_medium.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
earn2_long.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
earn3_short.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
earn3_medium.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
earn3_long.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)

#### Write the GeoDataFrames to JSON

In [8]:
gdf_earn1_short = gp.GeoDataFrame(earn1_short, geometry='geometry')
gdf_earn1_medium = gp.GeoDataFrame(earn1_medium, geometry='geometry')
gdf_earn1_long = gp.GeoDataFrame(earn1_long, geometry='geometry')
output = 'earn1_short.json'
out_path_file = ODpath.joinpath(output)
gdf_earn1_short.to_file(out_path_file, index=None, driver='GeoJSON')

print ('\nThe number of records written to earn1_short geo dataframe:', "{:,}".format(len(gdf_earn1_short)))
output = 'earn1_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_earn1_medium.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to earn1_medium geo dataframe:', "{:,}".format(len(gdf_earn1_medium)))
output = 'earn1_long.json'
out_path_file = ODpath.joinpath(output)
gdf_earn1_long.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to earn1_long geo dataframe:', "{:,}".format(len(gdf_earn1_long)))


The number of records written to earn1_short geo dataframe: 309,021

The number of records written to earn1_medium geo dataframe: 298,508

The number of records written to earn1_long geo dataframe: 419,147


In [9]:
gdf_earn2_short = gp.GeoDataFrame(earn2_short, geometry='geometry')
gdf_earn2_medium = gp.GeoDataFrame(earn2_medium, geometry='geometry')
gdf_earn2_long = gp.GeoDataFrame(earn2_long, geometry='geometry')
output = 'earn2_short.json'
out_path_file = ODpath.joinpath(output)
gdf_earn2_short.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to earn2_short geo dataframe:', "{:,}".format(len(gdf_earn2_short)))
output = 'earn2_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_earn2_medium.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to earn2_medium geo dataframe:', "{:,}".format(len(gdf_earn2_medium)))
output = 'earn2_long.json'
out_path_file = ODpath.joinpath(output)
gdf_earn2_long.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to earn2_long geo dataframe:', "{:,}".format(len(gdf_earn2_long)))


The number of records written to earn2_short geo dataframe: 736,704

The number of records written to earn2_medium geo dataframe: 685,151

The number of records written to earn2_long geo dataframe: 828,990


In [10]:
gdf_earn3_short = gp.GeoDataFrame(earn3_short, geometry='geometry')
gdf_earn3_medium = gp.GeoDataFrame(earn3_medium, geometry='geometry')
gdf_earn3_long = gp.GeoDataFrame(earn3_long, geometry='geometry')
output = 'earn3_short.json'
out_path_file = ODpath.joinpath(output)
gdf_earn3_short.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of record written to earn3_short geo dataframe:', "{:,}".format(len(gdf_earn3_short)))
output = 'earn3_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_earn3_medium.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to earn3_medium geo dataframe:', "{:,}".format(len(gdf_earn3_medium)))
output = 'earn3_long.json'
out_path_file = ODpath.joinpath(output)
gdf_earn3_long.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to earn3_long geo dataframe:', "{:,}".format(len(gdf_earn3_long)))


The number of record written to earn3_short geo dataframe: 1,919,316

The number of records written to earn3_medium geo dataframe: 1,879,961

The number of records written to earn3_long geo dataframe: 1,943,777
