## Create the Industry JSON data for Tiles

In [1]:
import pandas as pd
import geopandas as gp
import fiona
import shapely.geometry as geom

from pathlib import Path

#### Locate file needed for processing

In [2]:
ODpath = Path("../data/OD/")
OD_file = ODpath.joinpath("od_distance_1k-15k_15-60_miles.csv")
if OD_file.exists ():
    print ("OD file exist")
else:
    print ("OD file does not exist")
    

OD file exist


#### Read file

In [3]:
df = pd.read_csv(OD_file, dtype={'w_geocode': str,'h_geocode':str, 'w_group_count':str})

print ('\nThe number of records:', "{:,}".format(len(df)),'\n\n')

df.head(3)


The number of records: 8,383,570 




Unnamed: 0,w_geocode,h_geocode,distance,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,w_lat,w_lon,h_lat,h_lon,state,w_group_count
0,20200007023012,20200001011055,31749,2,1,0,1,0,2,0,0,1,1,61.212,-149.742,61.436,-149.375,2,1548
1,20200007023012,20200001012050,28342,1,0,0,1,1,0,0,0,1,0,61.212,-149.742,61.411,-149.414,2,1548
2,20200007023012,20200001012064,27888,1,0,1,0,0,1,0,0,0,1,61.212,-149.742,61.413,-149.431,2,1548


#### Create a LineString geometry

In [4]:
df['geometry'] = df.apply(lambda x: geom.LineString([(x['w_lon'], x['w_lat'] ), (x['h_lon'],x['h_lat'])]), axis = 1)
print ('\nLineString geometries created.')


LineString geometries created.


#### Drop unecessary columns

In [5]:
df.drop(['h_geocode', 'h_lat', 'h_lon', 'w_group_count', 'w_lat', 'w_lon', 'state'],axis=1, inplace=True)

df.head(3)

Unnamed: 0,w_geocode,distance,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,geometry
0,20200007023012,31749,2,1,0,1,0,2,0,0,1,1,"LINESTRING (-149.742 61.212, -149.375 61.43600..."
1,20200007023012,28342,1,0,0,1,1,0,0,0,1,0,"LINESTRING (-149.742 61.212, -149.414 61.411)"
2,20200007023012,27888,1,0,1,0,0,1,0,0,0,1,"LINESTRING (-149.742 61.212, -149.431 61.413)"


#### Build DataFrames & Process based on distance of commute

In [7]:
df_ind1 = df[df['SI01'] >= 1].copy()
ind1_short = df_ind1[(df_ind1['distance'] < 32000)]
ind1_med = df_ind1[((df_ind1['distance'] >= 32000) & (df_ind1['distance'] < 46000))]
ind1_long = df_ind1[(df_ind1['distance'] >= 46000)]
print ('\nThe number of records ind1_short:', "{:,}".format(len(ind1_short)))
print ('The number of records ind1_medium:', "{:,}".format(len(ind1_med)))
print ('The number of records ind1_long: ', "{:,}".format(len(ind1_long)))

df_ind2 = df[df['SI02'] >= 1].copy()
ind2_short = df_ind2[(df_ind2['distance'] < 32000)]
ind2_med = df_ind2[((df_ind2['distance'] >= 32000) & (df_ind2['distance'] < 46000))]
ind2_long = df_ind2[(df_ind2['distance'] >= 46000)]
print ('\nThe number of records ind2_short:', "{:,}".format(len(ind2_short)))
print ('The number of records ind2_medium:', "{:,}".format(len(ind2_med)))
print ('The number of recordsind2_long: ', "{:,}".format(len(ind2_long)))

df_ind3 = df[df['SI03'] >= 1].copy()
ind3_short = df_ind3[(df_ind3['distance'] < 32000)]
ind3_med = df_ind3[((df_ind3['distance'] >= 32000) & (df_ind3['distance'] < 46000))]
ind3_long = df_ind3[(df_ind3['distance'] >= 46000)]
print ('\nThe number of records ind3_short:', "{:,}".format(len(ind3_short)))
print ('The number of records ind3_medium:', "{:,}".format(len(ind3_med)))
print ('The number of records ind3_long: ', "{:,}".format(len(ind3_long)))


The number of records ind1_short: 375,921
The number of records ind1_medium: 386,974
The number of records ind1_long:  423,000

The number of records ind2_short: 416,543
The number of records ind2_medium: 402,350
The number of recordsind2_long:  527,018

The number of records ind3_short: 1,996,990
The number of records ind3_medium: 1,902,288
The number of records ind3_long:  2,027,573


#### Remove the unneeded columns

In [8]:
df_ind1.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
df_ind2.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
df_ind3.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)

#### Write the GeoDataFrames to JSON

In [9]:
gdf_ind1_short = gp.GeoDataFrame(ind1_short, geometry='geometry')
gdf_ind1_medium = gp.GeoDataFrame(ind1_med, geometry='geometry')
gdf_ind1_long = gp.GeoDataFrame(ind1_long, geometry='geometry')
output = 'ind1_short.json'
out_path_file = ODpath.joinpath(output)
gdf_ind1_short.to_file(out_path_file, index=None, driver='GeoJSON')

print ('\nThe number of records written to ind1_short geo dataframe:', "{:,}".format(len(gdf_ind1_short)))
output = 'ind1_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_ind1_medium.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to ind1_medium geo dataframe:', "{:,}".format(len(gdf_ind1_medium)))
output = 'ind1_long.json'
out_path_file = ODpath.joinpath(output)
gdf_ind1_long.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to ind1_long geo dataframe:', "{:,}".format(len(gdf_ind1_long)))

  with fiona.drivers():



The number of records written to ind1_short geo dataframe: 375,921

The number of records written to ind1_medium geo dataframe: 386,974

The number of records written to ind1_long geo dataframe: 423,000


In [10]:
gdf_ind2_short = gp.GeoDataFrame(ind2_short, geometry='geometry')
gdf_ind2_medium = gp.GeoDataFrame(ind2_med, geometry='geometry')
gdf_ind2_long = gp.GeoDataFrame(ind2_long, geometry='geometry')
output = 'ind2_short.json'
out_path_file = ODpath.joinpath(output)
gdf_ind2_short.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to ind2_short geo dataframe:', "{:,}".format(len(gdf_ind2_short)))
output = 'ind2_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_ind2_medium.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to ind2_medium geo dataframe:', "{:,}".format(len(gdf_ind2_medium)))
output = 'ind2_long.json'
out_path_file = ODpath.joinpath(output)
gdf_ind2_long.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to ind2_long geo dataframe:', "{:,}".format(len(gdf_ind2_long)))


The number of records written to ind2_short geo dataframe: 416,543

The number of records written to ind2_medium geo dataframe: 402,350

The number of records written to ind2_long geo dataframe: 527,018


In [11]:
gdf_ind3_short = gp.GeoDataFrame(ind3_short, geometry='geometry')
gdf_ind3_medium = gp.GeoDataFrame(ind3_med, geometry='geometry')
gdf_ind3_long = gp.GeoDataFrame(ind3_long, geometry='geometry')
output = 'ind3_short.json'
out_path_file = ODpath.joinpath(output)
gdf_ind3_short.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of record written to ind3_short geo dataframe:', "{:,}".format(len(gdf_ind3_short)))
output = 'ind3_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_ind3_medium.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to ind3_medium geo dataframe:', "{:,}".format(len(gdf_ind3_medium)))
output = 'ind3_long.json'
out_path_file = ODpath.joinpath(output)
gdf_ind3_long.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to ind3_long geo dataframe:', "{:,}".format(len(gdf_ind3_long)))


The number of record written to ind3_short geo dataframe: 1,996,990

The number of records written to ind3_medium geo dataframe: 1,902,288

The number of records written to ind3_long geo dataframe: 2,027,573
