# Create geojson file from list of PUDO coords
See /home/test/Documents/PROJECTS/VFH/iPython_Notebooks/dv_pudo_profiles.ipynb for more info.  

ref: https://geoffboeing.com/2015/10/exporting-python-data-geojson/  
Huom: `with open(output_filename, 'wb') as output_file:` should be `'w'` not `'wb'`  

In [1]:
import sys
sys.executable

'/home/test/anaconda3/envs/vfh/bin/python'

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import pandas.io.sql as pandasql
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as ticker

from IPython.display import HTML, display
import datetime
import matplotlib.dates as mdates
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import os

from datetime import datetime

import json

# Functions

In [60]:
def df_to_geojson(df, properties, lat='latitude', lon='longitude'):
    geojson = {'type':'FeatureCollection', 'features':[]}
    for _, row in df.iterrows():
        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}
        feature['geometry']['coordinates'] = [row[lon],row[lat]]
        for prop in properties:
            feature['properties'][prop] = row[prop]
        geojson['features'].append(feature)
    return geojson

# GeoJSON properties

In [61]:
# Define the properties of the PUDO geojson file
cols = ['dow', 'timewindow', 'type']

# Ward

In [31]:
ward=1
this_day = "Monday"
this_tod = "amPeak"

# Read coordinates

In [5]:
fname = '../../../Documents/PROJECTS/VFH/COT_PAGE/dummydata/test_data/w1_pudo_092018_latlon.csv'

df = pd.read_csv(fname)

print(df.shape)
df.head(5)

(1380447, 16)


Unnamed: 0,pickup_datetime,dropoff_datetime,pickup_ward2018,dropoff_ward2018,pickup_gc_intersection_id,dropoff_gc_intersection_id,pickup_mun_id,dropoff_mun_id,dow,timewindow,pickup_geom,dropoff_geom,lat_pickup,lon_pickup,lat_dropoff,lon_dropoff
0,2018-01-31 11:00:00,2018-01-31 11:16:10,1.0,1.0,13453377,13452644,69,69,Wednesday,midday,0101000020E6100000891D070AAEE753C0C9183602E5DD...,0101000020E6100000D7213E7D32E453C0A36DDE3370DE...,43.733551,-79.619998,43.737799,-79.565582
1,2018-02-04 11:00:00,2018-02-04 11:04:20,1.0,1.0,13452484,13449810,69,69,Saturday,midday,0101000020E6100000C253F8CEF0E553C02ED0E61F9ADE...,0101000020E610000053AD506718E653C09CC2B2E8C9E0...,43.739079,-79.592823,43.756162,-79.595239
2,2018-01-16 11:00:00,2018-01-16 11:50:22,13.0,1.0,20089579,13453797,69,69,Tuesday,midday,0101000020E6100000CCF75953E3D753C0872927604FD4...,0101000020E6100000A8D9369EE5E653C0E9F258C695DD...,43.658672,-79.37325,43.731133,-79.607765
3,2018-01-06 11:00:00,2018-01-06 11:36:41,10.0,1.0,30079943,13452805,69,69,Saturday,midday,0101000020E6100000DA7D7D639EDA53C0BC6C5A7ACBD1...,0101000020E61000001B87ED33D6E353C0CD0709F34CDE...,43.639022,-79.415917,43.736723,-79.559949
4,2018-01-12 11:00:00,2018-01-12 11:13:41,1.0,1.0,20232392,13462944,69,69,Friday,midday,0101000020E6100000214404ACADE653C06A0D40CB3CDD...,0101000020E6100000FF5CD13961E453C0499FDFC27DD6...,43.728418,-79.60435,43.675713,-79.568434


In [6]:
list(df)

['pickup_datetime',
 'dropoff_datetime',
 'pickup_ward2018',
 'dropoff_ward2018',
 'pickup_gc_intersection_id',
 'dropoff_gc_intersection_id',
 'pickup_mun_id',
 'dropoff_mun_id',
 'dow',
 'timewindow',
 'pickup_geom',
 'dropoff_geom',
 'lat_pickup',
 'lon_pickup',
 'lat_dropoff',
 'lon_dropoff']

# PUDO subsets

## Filter by ward

### all days of week, all time windows

In [8]:
df_pu = df.loc[df['pickup_ward2018']==ward, ['pickup_datetime',
 'dropoff_datetime', 'dropoff_ward2018', 'dow', 'timewindow','lat_pickup','lon_pickup']]
print(df_pu.shape)

(641183, 7)


In [9]:
df_pu = df_pu.rename(columns={'lat_pickup': 'latitude', 'lon_pickup': 'longitude'})
list(df_pu)

['pickup_datetime',
 'dropoff_datetime',
 'dropoff_ward2018',
 'dow',
 'timewindow',
 'latitude',
 'longitude']

### Monday, amPeak only

In [62]:
df_mon_amPeak = df.loc[(df['dow']=="Monday") & (df['timewindow']=="amPeak"), ['pickup_datetime',
 'dropoff_datetime', 'pickup_ward2018','dropoff_ward2018', 'dow', 'timewindow','lat_pickup','lon_pickup']]

print(df_mon_amPeak.shape)
list(df_mon_amPeak)

(18226, 8)


['pickup_datetime',
 'dropoff_datetime',
 'pickup_ward2018',
 'dropoff_ward2018',
 'dow',
 'timewindow',
 'lat_pickup',
 'lon_pickup']

In [63]:
df_mon_amPeak['type'] = np.nan

In [64]:
df_mon_amPeak.loc[df_mon_amPeak['pickup_ward2018']==1, 'type'] = 1
df_mon_amPeak.loc[df_mon_amPeak['dropoff_ward2018']==1, 'type'] = 5

In [65]:
df_mon_amPeak['type'].unique()

array([5., 1.])

In [66]:
df_mon_amPeak = df_mon_amPeak.rename(columns={'lat_pickup': 'latitude', 'lon_pickup': 'longitude'})
list(df_mon_amPeak)

['pickup_datetime',
 'dropoff_datetime',
 'pickup_ward2018',
 'dropoff_ward2018',
 'dow',
 'timewindow',
 'latitude',
 'longitude',
 'type']

In [69]:
df_mon_amPeak = df_mon_amPeak.astype({"type": int,})

In [70]:
df_mon_amPeak['type'].unique()

array([5, 1])

# Save to geojson

In [71]:
# geojson_pu = df_to_geojson(df_pu, cols)

In [72]:
geojson_mon_amPeak = df_to_geojson(df_mon_amPeak, cols)

# Write geojson to file

In [73]:
output_filename = '../src/geojson/w1_092018_' + this_day + '_' + this_tod + '.geojson'

with open(output_filename, 'w') as output_file:
    output_file.write('')
    json.dump(geojson_mon_amPeak, output_file, indent=2) 