## Create the Age JSON data for Tiles

In [1]:
import pandas as pd
import geopandas as gp
import fiona
import shapely.geometry as geom

from pathlib import Path

#### Locate file needed for processing

In [2]:
ODpath = Path("../data/OD/")
OD_file = ODpath.joinpath("od_distance_1k-15k_15-60_miles_continental.csv")
if OD_file.exists ():
    print ("OD file exist")
else:
    print ("OD file does not exist")
    

OD file exist


#### Read file

In [3]:
df = pd.read_csv(OD_file, dtype={'w_geocode': str,'h_geocode':str, 'w_group_count':str})

print ('\nThe number of records:', "{:,}".format(len(df)),'\n\n')

df.head(3)


The number of records: 8,770,266 




Unnamed: 0,w_geocode,h_geocode,distance,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,w_lat,w_lon,h_lat,h_lon,state,w_group_count
0,10010205001001,10010209001001,30634,1,1,0,0,1,0,0,0,0,1,32.457,-86.415,32.705,-86.559,1,1119
1,10010205001001,10010209001002,29897,2,2,0,0,1,1,0,0,0,2,32.457,-86.415,32.694,-86.567,1,1119
2,10010205001001,10010209001015,31828,1,1,0,0,0,1,0,0,0,1,32.457,-86.415,32.699,-86.597,1,1119


#### Create a LineString geometry

In [4]:
df['geometry'] = df.apply(lambda x: geom.LineString([(x['w_lon'], x['w_lat'] ), (x['h_lon'],x['h_lat'])]), axis = 1)
print ('\nLineString geometries created.')


LineString geometries created.


#### Drop unecessary columns

In [5]:
df.drop(['h_geocode', 'h_lat', 'h_lon', 'w_group_count', 'w_lat', 'w_lon', 'state'],axis=1, inplace=True)

df.head(3)

Unnamed: 0,w_geocode,distance,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,geometry
0,10010205001001,30634,1,1,0,0,1,0,0,0,0,1,"LINESTRING (-86.41500000000001 32.457, -86.559..."
1,10010205001001,29897,2,2,0,0,1,1,0,0,0,2,"LINESTRING (-86.41500000000001 32.457, -86.567..."
2,10010205001001,31828,1,1,0,0,0,1,0,0,0,1,"LINESTRING (-86.41500000000001 32.457, -86.597..."


#### Build DataFrames & Process based on distance of commute

In [6]:
df_age1 = df[df['SA01'] >= 1].copy()
age1_short = df_age1[(df_age1['distance'] < 32000)].copy()
age1_medium = df_age1[((df_age1['distance'] >= 32000) & (df_age1['distance'] < 46000))].copy()
age1_long = df_age1[(df_age1['distance'] >= 46000)].copy()
print ('\nThe number of records age1_short:', "{:,}".format(len(age1_short)))
print ('The number of records age1_medium:', "{:,}".format(len(age1_medium)))
print ('The number of records age1_long: ', "{:,}".format(len(age1_long)))

df_age2 = df[df['SA02'] >= 1].copy()
age2_short = df_age2[(df_age2['distance'] < 32000)].copy()
age2_medium = df_age2[((df_age2['distance'] >= 32000) & (df_age2['distance'] < 46000))].copy()
age2_long = df_age2[(df_age2['distance'] >= 46000)].copy()
print ('\nThe number of records age2_short:', "{:,}".format(len(age2_short)))
print ('The number of records age2_medium:', "{:,}".format(len(age2_medium)))
print ('The number of recordsage2_long: ', "{:,}".format(len(age2_long)))

df_age3 = df[df['SA03'] >= 1].copy()
age3_short = df_age3[(df_age3['distance'] < 32000)].copy()
age3_medium = df_age3[((df_age3['distance'] >= 32000) & (df_age3['distance'] < 46000))].copy()
age3_long = df_age3[(df_age3['distance'] >= 46000)].copy()
print ('\nThe number of records age3_short:', "{:,}".format(len(age3_short)))
print ('The number of records age3_medium:', "{:,}".format(len(age3_medium)))
print ('The number of records age3_long: ', "{:,}".format(len(age3_long)))


The number of records age1_short: 568,547
The number of records age1_medium: 550,931
The number of records age1_long:  682,867

The number of records age2_short: 1,763,154
The number of records age2_medium: 1,705,855
The number of recordsage2_long:  1,818,050

The number of records age3_short: 680,758
The number of records age3_medium: 640,971
The number of records age3_long:  709,274


#### Remove the unneeded columns

In [7]:
age1_short.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
age1_medium.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
age1_long.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
age2_short.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
age2_medium.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
age2_long.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
age3_short.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
age3_medium.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)
age3_long.drop(['distance', 'S000','SA01','SA02','SA03','SE01','SE02','SE03','SI01','SI02','SI03'],axis=1, inplace=True)

#### Write the GeoDataFrames to JSON

In [8]:
gdf_age1_short = gp.GeoDataFrame(age1_short, geometry='geometry')
gdf_age1_medium = gp.GeoDataFrame(age1_medium, geometry='geometry')
gdf_age1_long = gp.GeoDataFrame(age1_long, geometry='geometry')
output = 'age1_short.json'
out_path_file = ODpath.joinpath(output)
gdf_age1_short.to_file(out_path_file, index=None, driver='GeoJSON')

print ('\nThe number of records written to age1_short geo dataframe:', "{:,}".format(len(gdf_age1_short)))
output = 'age1_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_age1_medium.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to age1_medium geo dataframe:', "{:,}".format(len(gdf_age1_medium)))
output = 'age1_long.json'
out_path_file = ODpath.joinpath(output)
gdf_age1_long.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to age1_long geo dataframe:', "{:,}".format(len(gdf_age1_long)))


The number of records written to age1_short geo dataframe: 568,547

The number of records written to age1_medium geo dataframe: 550,931

The number of records written to age1_long geo dataframe: 682,867


In [9]:
gdf_age2_short = gp.GeoDataFrame(age2_short, geometry='geometry')
gdf_age2_medium = gp.GeoDataFrame(age2_medium, geometry='geometry')
gdf_age2_long = gp.GeoDataFrame(age2_long, geometry='geometry')
output = 'age2_short.json'
out_path_file = ODpath.joinpath(output)
gdf_age2_short.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to age2_short geo dataframe:', "{:,}".format(len(gdf_age2_short)))
output = 'age2_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_age2_medium.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to age2_medium geo dataframe:', "{:,}".format(len(gdf_age2_medium)))
output = 'age2_long.json'
out_path_file = ODpath.joinpath(output)
gdf_age2_long.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to age2_long geo dataframe:', "{:,}".format(len(gdf_age2_long)))


The number of records written to age2_short geo dataframe: 1,763,154

The number of records written to age2_medium geo dataframe: 1,705,855

The number of records written to age2_long geo dataframe: 1,818,050


In [10]:
gdf_age3_short = gp.GeoDataFrame(age3_short, geometry='geometry')
gdf_age3_medium = gp.GeoDataFrame(age3_medium, geometry='geometry')
gdf_age3_long = gp.GeoDataFrame(age3_long, geometry='geometry')
output = 'age3_short.json'
out_path_file = ODpath.joinpath(output)
gdf_age3_short.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of record written to age3_short geo dataframe:', "{:,}".format(len(gdf_age3_short)))
output = 'age3_medium.json'
out_path_file = ODpath.joinpath(output)
gdf_age3_medium.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to age3_medium geo dataframe:', "{:,}".format(len(gdf_age3_medium)))
output = 'age3_long.json'
out_path_file = ODpath.joinpath(output)
gdf_age3_long.to_file(out_path_file, index=None, driver='GeoJSON')
print ('\nThe number of records written to age3_long geo dataframe:', "{:,}".format(len(gdf_age3_long)))


The number of record written to age3_short geo dataframe: 680,758

The number of records written to age3_medium geo dataframe: 640,971

The number of records written to age3_long geo dataframe: 709,274
