# Create geojson file from list of PUDO coords
See /home/test/Documents/PROJECTS/VFH/iPython_Notebooks/dv_pudo_profiles.ipynb for more info.  

ref: https://geoffboeing.com/2015/10/exporting-python-data-geojson/  
Huom: `with open(output_filename, 'wb') as output_file:` should be `'w'` not `'wb'`  

In [1]:
import sys
sys.executable

'/home/test/anaconda3/envs/vfh/bin/python'

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import pandas.io.sql as pandasql
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as ticker

from IPython.display import HTML, display
import datetime
import matplotlib.dates as mdates
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import os

from datetime import datetime

import json

# Functions

In [3]:
# https://geoffboeing.com/2015/10/exporting-python-data-geojson/
def df_to_geojson(df, properties, lat='latitude', lon='longitude'):
    geojson = {'type':'FeatureCollection', 'features':[]}
    for _, row in df.iterrows():
        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}
        feature['geometry']['coordinates'] = [row[lon],row[lat]]
        for prop in properties:
            feature['properties'][prop] = row[prop]
        geojson['features'].append(feature)
    return geojson

# GeoJSON properties for file

In [4]:
# Define the properties of the PUDO geojson file
cols = ['dow', 'timewindow', 'type']

# Variables

In [5]:
ward=1
this_day = "Monday"
this_tod = "amPeak"

# Read PUDO data from csv file

In [6]:
fname = '../../../Documents/PROJECTS/VFH/COT_PAGE/dummydata/test_data/w1_pudo_092018_latlon.csv'

df = pd.read_csv(fname)

print(df.shape)
df.head(5)

(1380447, 16)


Unnamed: 0,pickup_datetime,dropoff_datetime,pickup_ward2018,dropoff_ward2018,pickup_gc_intersection_id,dropoff_gc_intersection_id,pickup_mun_id,dropoff_mun_id,dow,timewindow,pickup_geom,dropoff_geom,lat_pickup,lon_pickup,lat_dropoff,lon_dropoff
0,2018-01-31 11:00:00,2018-01-31 11:16:10,1.0,1.0,13453377,13452644,69,69,Wednesday,midday,0101000020E6100000891D070AAEE753C0C9183602E5DD...,0101000020E6100000D7213E7D32E453C0A36DDE3370DE...,43.733551,-79.619998,43.737799,-79.565582
1,2018-02-04 11:00:00,2018-02-04 11:04:20,1.0,1.0,13452484,13449810,69,69,Saturday,midday,0101000020E6100000C253F8CEF0E553C02ED0E61F9ADE...,0101000020E610000053AD506718E653C09CC2B2E8C9E0...,43.739079,-79.592823,43.756162,-79.595239
2,2018-01-16 11:00:00,2018-01-16 11:50:22,13.0,1.0,20089579,13453797,69,69,Tuesday,midday,0101000020E6100000CCF75953E3D753C0872927604FD4...,0101000020E6100000A8D9369EE5E653C0E9F258C695DD...,43.658672,-79.37325,43.731133,-79.607765
3,2018-01-06 11:00:00,2018-01-06 11:36:41,10.0,1.0,30079943,13452805,69,69,Saturday,midday,0101000020E6100000DA7D7D639EDA53C0BC6C5A7ACBD1...,0101000020E61000001B87ED33D6E353C0CD0709F34CDE...,43.639022,-79.415917,43.736723,-79.559949
4,2018-01-12 11:00:00,2018-01-12 11:13:41,1.0,1.0,20232392,13462944,69,69,Friday,midday,0101000020E6100000214404ACADE653C06A0D40CB3CDD...,0101000020E6100000FF5CD13961E453C0499FDFC27DD6...,43.728418,-79.60435,43.675713,-79.568434


In [7]:
list(df)

['pickup_datetime',
 'dropoff_datetime',
 'pickup_ward2018',
 'dropoff_ward2018',
 'pickup_gc_intersection_id',
 'dropoff_gc_intersection_id',
 'pickup_mun_id',
 'dropoff_mun_id',
 'dow',
 'timewindow',
 'pickup_geom',
 'dropoff_geom',
 'lat_pickup',
 'lon_pickup',
 'lat_dropoff',
 'lon_dropoff']

# Housekeeping

## 1. Round lat, lon to 4 decimal places

In [8]:
n=4 # number of decimals to round to

df['lat_pickup'] = df['lat_pickup'].round(decimals=n)
df['lon_pickup'] = df['lon_pickup'].round(decimals=n)

df['lat_dropoff'] = df['lat_dropoff'].round(decimals=n)
df['lon_dropoff'] = df['lon_dropoff'].round(decimals=n)

## 2. Assign a Pickup or Dropoff in column 'type'

In [9]:
# Assign a Pickup or Dropoff in column 'type'
df['type'] = 0

df.loc[df['pickup_ward2018']==1, 'type'] = 1
df.loc[df['dropoff_ward2018']==1, 'type'] = 5

In [10]:
# Check
df['type'].unique()

array([5, 1])

## 3. Make a latitude, longitude column for each type

In [12]:
df['latitude'] = np.nan
df['longitude'] = np.nan

In [13]:
df['latitude'] = np.where(df['type']==1, df['lat_pickup'], df['lat_dropoff'])
df['longitude'] = np.where(df['type']==1, df['lon_pickup'], df['lon_dropoff'])

In [15]:
# Check
df.loc[df['latitude'].isnull()]

Unnamed: 0,pickup_datetime,dropoff_datetime,pickup_ward2018,dropoff_ward2018,pickup_gc_intersection_id,dropoff_gc_intersection_id,pickup_mun_id,dropoff_mun_id,dow,timewindow,pickup_geom,dropoff_geom,lat_pickup,lon_pickup,lat_dropoff,lon_dropoff,type,latitude,longitude


In [16]:
# Check
df.loc[df['longitude'].isnull()]

Unnamed: 0,pickup_datetime,dropoff_datetime,pickup_ward2018,dropoff_ward2018,pickup_gc_intersection_id,dropoff_gc_intersection_id,pickup_mun_id,dropoff_mun_id,dow,timewindow,pickup_geom,dropoff_geom,lat_pickup,lon_pickup,lat_dropoff,lon_dropoff,type,latitude,longitude


# PUDO subsets

## Filter by ward

### all days of week, all time windows

In [None]:
# df_pu = df.loc[df['pickup_ward2018']==ward, ['pickup_datetime',
#  'dropoff_datetime', 'dropoff_ward2018', 'dow', 'timewindow','lat_pickup','lon_pickup']]
# print(df_pu.shape)

In [None]:
# df_pu = df_pu.rename(columns={'lat_pickup': 'latitude', 'lon_pickup': 'longitude'})
# list(df_pu)

### Monday, amPeak only

In [18]:
df_mon_amPeak = df.loc[(df['dow']=="Monday") & (df['timewindow']=="amPeak"), ['pickup_datetime',
 'dropoff_datetime', 'pickup_ward2018','dropoff_ward2018','type', 'dow', 'timewindow','latitude', 'longitude']]

print(df_mon_amPeak.shape)
list(df_mon_amPeak)

(18226, 9)


['pickup_datetime',
 'dropoff_datetime',
 'pickup_ward2018',
 'dropoff_ward2018',
 'type',
 'dow',
 'timewindow',
 'latitude',
 'longitude']

In [23]:
df_mon_amPeak.loc[df_mon_amPeak['type']==5].head(10)

Unnamed: 0,pickup_datetime,dropoff_datetime,pickup_ward2018,dropoff_ward2018,type,dow,timewindow,latitude,longitude
37,2018-02-05 07:00:00,2018-02-05 07:27:39,2.0,1.0,5,Monday,amPeak,43.7311,-79.6078
38,2018-02-05 07:00:00,2018-02-05 07:31:18,8.0,1.0,5,Monday,amPeak,43.7311,-79.6078
39,2018-01-15 07:00:00,2018-01-15 07:05:55,1.0,1.0,5,Monday,amPeak,43.7075,-79.5782
81,2018-02-12 07:00:00,2018-02-12 07:10:52,1.0,1.0,5,Monday,amPeak,43.7301,-79.6049
84,2018-01-29 07:00:00,2018-01-29 07:10:34,2.0,1.0,5,Monday,amPeak,43.7147,-79.5922
85,2018-01-01 07:00:00,2018-01-01 07:06:29,7.0,1.0,5,Monday,amPeak,43.7322,-79.5921
109,2018-02-12 07:00:00,2018-02-12 07:12:18,1.0,1.0,5,Monday,amPeak,43.7019,-79.5756
134,2018-01-15 07:00:00,2018-01-15 07:18:23,7.0,1.0,5,Monday,amPeak,43.7115,-79.5714
142,2018-01-01 07:00:00,2018-01-01 07:13:30,7.0,1.0,5,Monday,amPeak,43.6934,-79.5885
143,2018-01-01 07:00:00,2018-01-01 07:29:11,7.0,1.0,5,Monday,amPeak,43.6978,-79.5932


In [None]:
df=

# Save to geojson

In [None]:
# geojson_pu = df_to_geojson(df_pu, cols)

In [24]:
geojson_mon_amPeak = df_to_geojson(df_mon_amPeak, cols)

# Write geojson to file

In [25]:
output_filename = '../src/geojson/w1_092018_' + this_day + '_' + this_tod + '.geojson'

with open(output_filename, 'w') as output_file:
    output_file.write('')
    json.dump(geojson_mon_amPeak, output_file, indent=2) 