## Create the Earnings JSON data for Tiles

In [1]:
import pandas as pd
import geopandas as gp
import fiona
import shapely.geometry as geom

from pathlib import Path

#### Locate file needed for processing

In [2]:
ODpath = Path("../data/OD/")
OD_file = ODpath.joinpath("od_distance_1k-15k_15-60_miles.csv")
if OD_file.exists ():
    print ("OD file exist")
else:
    print ("OD file does not exist")
    

OD file exist


#### Read file

In [3]:
df = pd.read_csv(OD_file, dtype={'w_geocode': str,'h_geocode':str, 'w_group_count':str})

print ('\nThe number of records:', "{:,}".format(len(df)),'\n\n')

df.head(3)


The number of records: 8,383,570 




Unnamed: 0,w_geocode,h_geocode,distance,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,w_lat,w_lon,h_lat,h_lon,state,w_group_count
0,20200007023012,20200001011055,31749,2,1,0,1,0,2,0,0,1,1,61.212,-149.742,61.436,-149.375,2,1548
1,20200007023012,20200001012050,28342,1,0,0,1,1,0,0,0,1,0,61.212,-149.742,61.411,-149.414,2,1548
2,20200007023012,20200001012064,27888,1,0,1,0,0,1,0,0,0,1,61.212,-149.742,61.413,-149.431,2,1548


#### Create a LineString geometry

In [4]:
df['geometry'] = df.apply(lambda x: geom.LineString([(x['w_lon'], x['w_lat'] ), (x['h_lon'],x['h_lat'])]), axis = 1)
print ('\nLineString geometries created.')


LineString geometries created.


#### Drop unecessary columns

In [5]:
df.drop(['h_geocode', 'h_lat', 'h_lon', 'w_group_count', 'w_lat', 'w_lon', 'state'],axis=1, inplace=True)

df.head(3)

Unnamed: 0,w_geocode,distance,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,geometry
0,20200007023012,31749,2,1,0,1,0,2,0,0,1,1,"LINESTRING (-149.742 61.212, -149.375 61.43600..."
1,20200007023012,28342,1,0,0,1,1,0,0,0,1,0,"LINESTRING (-149.742 61.212, -149.414 61.411)"
2,20200007023012,27888,1,0,1,0,0,1,0,0,0,1,"LINESTRING (-149.742 61.212, -149.431 61.413)"


#### Build DataFrames & Process based on distance of commute

In [6]:
df_age1 = df[df['SA01'] >= 1].copy()
age1_short = df_age1[(df_age1['distance'] < 32000)]
age1_med = df_age1[((df_age1['distance'] >= 32000) & (df_age1['distance'] < 46000))]
age1_long = df_age1[(df_age1['distance'] >= 46000)]
print ('\nThe number of records age1_short:', "{:,}".format(len(age1_short)))
print ('The number of records age1_medium:', "{:,}".format(len(age1_med)))
print ('The number of records age1_long: ', "{:,}".format(len(age1_long)))

df_age2 = df[df['SA02'] >= 1].copy()
age2_short = df_age2[(df_age2['distance'] < 32000)]
age2_med = df_age2[((df_age2['distance'] >= 32000) & (df_age2['distance'] < 46000))]
age2_long = df_age2[(df_age2['distance'] >= 46000)]
print ('\nThe number of records age2_short:', "{:,}".format(len(age2_short)))
print ('The number of records age2_medium:', "{:,}".format(len(age2_med)))
print ('The number of recordsage2_long: ', "{:,}".format(len(age2_long)))

df_age3 = df[df['SA03'] >= 1].copy()
age3_short = df_age3[(df_age3['distance'] < 32000)]
age3_med = df_age3[((df_age3['distance'] >= 32000) & (df_age3['distance'] < 46000))]
age3_long = df_age3[(df_age3['distance'] >= 46000)]
print ('\nThe number of records age3_short:', "{:,}".format(len(age3_short)))
print ('The number of records age3_medium:', "{:,}".format(len(age3_med)))
print ('The number of records age3_long: ', "{:,}".format(len(age3_long)))


The number of records age1_short: 519,869
The number of records age1_medium: 492,882
The number of records age1_long:  606,181

The number of records age2_short: 1,738,074
The number of records age2_medium: 1,666,285
The number of recordsage2_long:  1,749,646

The number of records age3_short: 657,255
The number of records age3_medium: 613,549
The number of records age3_long:  665,189


#### Remove the unneeded columns

In [7]:
df_age1.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
df_age2.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
df_age3.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)

#### Write the GeoDataFrames to JSON

In [8]:
gdf_age1_short = gp.GeoDataFrame(age1_short, geometry='geometry')
gdf_age1_medium = gp.GeoDataFrame(age1_med, geometry='geometry')
gdf_age1_long = gp.GeoDataFrame(age1_long, geometry='geometry')
output = 'age1_short.json'
out_path_file = ODpath.joinpath(output)
gdf_age1_short.to_file(out_path_file, ageex=None, driver='GeoJSON')

print ('\nThe number of records written to age1_short geo dataframe:', "{:,}".format(len(gdf_age1_short)))
output = 'age1_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_age1_medium.to_file(out_path_file, ageex=None, driver='GeoJSON')
print ('\nThe number of records written to age1_medium geo dataframe:', "{:,}".format(len(gdf_age1_medium)))
output = 'age1_long.json'
out_path_file = ODpath.joinpath(output)
gdf_age1_long.to_file(out_path_file, ageex=None, driver='GeoJSON')
print ('\nThe number of records written to age1_long geo dataframe:', "{:,}".format(len(gdf_age1_long)))


The number of records written to age1_short geo dataframe: 519,869

The number of records written to age1_medium geo dataframe: 492,882

The number of records written to age1_long geo dataframe: 606,181


In [9]:
gdf_age2_short = gp.GeoDataFrame(age2_short, geometry='geometry')
gdf_age2_medium = gp.GeoDataFrame(age2_med, geometry='geometry')
gdf_age2_long = gp.GeoDataFrame(age2_long, geometry='geometry')
output = 'age2_short.json'
out_path_file = ODpath.joinpath(output)
gdf_age2_short.to_file(out_path_file, ageex=None, driver='GeoJSON')
print ('\nThe number of records written to age2_short geo dataframe:', "{:,}".format(len(gdf_age2_short)))
output = 'age2_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_age2_medium.to_file(out_path_file, ageex=None, driver='GeoJSON')
print ('\nThe number of records written to age2_medium geo dataframe:', "{:,}".format(len(gdf_age2_medium)))
output = 'age2_long.json'
out_path_file = ODpath.joinpath(output)
gdf_age2_long.to_file(out_path_file, ageex=None, driver='GeoJSON')
print ('\nThe number of records written to age2_long geo dataframe:', "{:,}".format(len(gdf_age2_long)))


The number of records written to age2_short geo dataframe: 1,738,074

The number of records written to age2_medium geo dataframe: 1,666,285

The number of records written to age2_long geo dataframe: 1,749,646


In [10]:
gdf_age3_short = gp.GeoDataFrame(age3_short, geometry='geometry')
gdf_age3_medium = gp.GeoDataFrame(age3_med, geometry='geometry')
gdf_age3_long = gp.GeoDataFrame(age3_long, geometry='geometry')
output = 'age3_short.json'
out_path_file = ODpath.joinpath(output)
gdf_age3_short.to_file(out_path_file, ageex=None, driver='GeoJSON')
print ('\nThe number of record written to age3_short geo dataframe:', "{:,}".format(len(gdf_age3_short)))
output = 'age3_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_age3_medium.to_file(out_path_file, ageex=None, driver='GeoJSON')
print ('\nThe number of records written to age3_medium geo dataframe:', "{:,}".format(len(gdf_age3_medium)))
output = 'age3_long.json'
out_path_file = ODpath.joinpath(output)
gdf_age3_long.to_file(out_path_file, ageex=None, driver='GeoJSON')
print ('\nThe number of records written to age3_long geo dataframe:', "{:,}".format(len(gdf_age3_long)))


The number of record written to age3_short geo dataframe: 657,255

The number of records written to age3_medium geo dataframe: 613,549

The number of records written to age3_long geo dataframe: 665,189
