# Create json file of daily trips averaged by month

original table: `ptc.daily_trips`  

Processed to fill-in data gap using this query:  
```
SELECT to_char(dt, 'YYYY-MM') as period, 
     CASE WHEN to_char(dt, 'YYYY-MM') = '2018-03' THEN 108321
         ELSE avg(count)::integer 
     END as count 
FROM ptc.daily_trips
GROUP BY period
ORDER BY period
```

JSON format should be:  
```
{
	"tpd": {
		"keys": {
			"name": "years",
			"values": [
				"2016-09", "2016-10", "2016-11", "2016-12", "2017-01", "2017-02",
	       "2017-03", "2017-04", "2017-05", "2017-06", "2017-07", "2017-08",
	       "2017-09", "2017-10", "2017-11", "2017-12", "2018-01", "2018-02",
	       "2018-03", "2018-04", "2018-05", "2018-06", "2018-07", "2018-08",
	       "2018-09", "2018-10", "2018-11", "2018-12", "2019-01", "2019-02",
	       "2019-03"
			]
		},
		"city": [
			62242,  64443,  65595,  71107,  69280,  75834,  80486,  80165,
	        79951,  83822,  76303,  77527,  82788,  83752,  88247,  97224,
	        91856, 100055, 108321, 116587, 118861, 132495, 131914, 145230,
	       152776, 159412, 161733, 164054, 161267, 172254, 175803
		]
	}
}
```

In [48]:
import sys
sys.executable

'/home/nangini/anaconda3/bin/python'

In [49]:
%matplotlib inline
import numpy as np
import pandas as pd
import pandas.io.sql as pandasql
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as ticker

from IPython.display import HTML, display
import datetime
import matplotlib.dates as mdates
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import os

from datetime import datetime

import json

# Functions

In [50]:
# https://geoffboeing.com/2015/10/exporting-python-data-geojson/
def df_to_geojson(df, properties, lat="latitude", lon="longitude"):
    geojson = {"type":"FeatureCollection", "features":[]}
    for _, row in df.iterrows():
        feature = {"type":"Feature",
                   "properties":{},
                   "geometry":{"type":"Point",
                               "coordinates":[]}}
        feature["geometry"]["coordinates"] = [row[lon],row[lat]]
        for prop in properties:
            feature["properties"][prop] = int(row[prop])
        geojson["features"].append(feature)
    return geojson

# Variables

# Read daily trip count from csv file
Created by the following query:  
```
SELECT to_char(dt, 'YYYY-MM') as period, 
     CASE WHEN to_char(dt, 'YYYY-MM') = '2018-03' THEN 108321
         ELSE avg(count)::integer 
     END as count 
FROM ptc.daily_trips
GROUP BY period
ORDER BY period
```

In [51]:
mydir = '../../../../Documents/PROJECTS/BDITTO/VFH/COT_PAGE/DATA/FROM_PGADMIN/'

# fname = '/../../Documents/PROJECTS/VFH/COT_PAGE/INTERIM_DATA/FROM_PGADMIN/daily_trips_processed.csv'
fname = mydir + 'daily_trips_filledin.csv'


df = pd.read_csv(fname)

print(df.shape)
df.head(5)

(1088, 2)


Unnamed: 0,dt,count
0,2016-09-07,53897.0
1,2016-09-08,56636.0
2,2016-09-09,72422.0
3,2016-09-10,90372.0
4,2016-09-11,65221.0


# Process data

## Group the data by month, and take the mean for each month
https://chrisalbon.com/python/data_wrangling/pandas_group_data_by_time/

In [52]:
# Set dt col as index
df.set_index('dt', inplace=True)

df.head(5)

Unnamed: 0_level_0,count
dt,Unnamed: 1_level_1
2016-09-07,53897.0
2016-09-08,56636.0
2016-09-09,72422.0
2016-09-10,90372.0
2016-09-11,65221.0


In [53]:
# Convert index to datetime
df.index = pd.to_datetime(df.index)

df.head(5)

Unnamed: 0_level_0,count
dt,Unnamed: 1_level_1
2016-09-07,53897.0
2016-09-08,56636.0
2016-09-09,72422.0
2016-09-10,90372.0
2016-09-11,65221.0


In [54]:
# Group the data by month, and take the mean for each group (i.e. each month)
dm = df.resample('M').mean()
dm.head(5)

Unnamed: 0_level_0,count
dt,Unnamed: 1_level_1
2016-09-30,62241.708333
2016-10-31,64442.709677
2016-11-30,65595.133333
2016-12-31,71106.741935
2017-01-31,69279.935484


In [55]:
# store only YYYY-MM
dm.index = dm.index.strftime('%Y-%m')

In [56]:
dm.index

Index(['2016-09', '2016-10', '2016-11', '2016-12', '2017-01', '2017-02',
       '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08',
       '2017-09', '2017-10', '2017-11', '2017-12', '2018-01', '2018-02',
       '2018-03', '2018-04', '2018-05', '2018-06', '2018-07', '2018-08',
       '2018-09', '2018-10', '2018-11', '2018-12', '2019-01', '2019-02',
       '2019-03', '2019-04', '2019-05', '2019-06', '2019-07'],
      dtype='object')

In [57]:
dm.head(5)

Unnamed: 0,count
2016-09,62241.708333
2016-10,64442.709677
2016-11,65595.133333
2016-12,71106.741935
2017-01,69279.935484


## Convert count to int

In [58]:
dm['count'] = dm['count'].astype('int')

In [59]:
dm.head(5)

Unnamed: 0,count
2016-09,62241
2016-10,64442
2016-11,65595
2016-12,71106
2017-01,69279


# Check against database

## pick-ups in ward

**WARD 10 Monday, hr 0 in db**  

```
SELECT COUNT(*) FROM cnangini.trip_data_092018_counts
WHERE pickup_ward2018 = 10
AND pickup_dow = 'Monday'
AND pickup_hr = '00'
```
=> **4146 for ward 10**  
=> **265 for ward 1**  

In [60]:
# print('ward: ' + repr(ward))
# df_merge.loc[(df_merge['dow']=='Monday') & (df_merge['hr']==0), ['dow','hr','pcounts']]
# # => CHECK OK

# Create json object and write to file
Format:  
```
{
	"tpd": {
		"keys": {
			"name": "years",
			"values": [
				"2016-09", "2016-10", "2016-11", "2016-12", "2017-01", "2017-02",
	       "2017-03", "2017-04", "2017-05", "2017-06", "2017-07", "2017-08",
	       "2017-09", "2017-10", "2017-11", "2017-12", "2018-01", "2018-02",
	       "2018-03", "2018-04", "2018-05", "2018-06", "2018-07", "2018-08",
	       "2018-09", "2018-10", "2018-11", "2018-12", "2019-01", "2019-02",
	       "2019-03"
			]
		},
		"city": [
			62242,  64443,  65595,  71107,  69280,  75834,  80486,  80165,
	        79951,  83822,  76303,  77527,  82788,  83752,  88247,  97224,
	        91856, 100055, 108321, 116587, 118861, 132495, 131914, 145230,
	       152776, 159412, 161733, 164054, 161267, 172254, 175803
		]
	}
}

```

In [63]:
ptcjson = {
    "tpd": {
        "keys": {
            "name": "date",
            "values": dm.index.to_list()
        },
        "city": dm['count'].to_list()
    }
}

ptcjson

{'tpd': {'keys': {'name': 'date',
   'values': ['2016-09',
    '2016-10',
    '2016-11',
    '2016-12',
    '2017-01',
    '2017-02',
    '2017-03',
    '2017-04',
    '2017-05',
    '2017-06',
    '2017-07',
    '2017-08',
    '2017-09',
    '2017-10',
    '2017-11',
    '2017-12',
    '2018-01',
    '2018-02',
    '2018-03',
    '2018-04',
    '2018-05',
    '2018-06',
    '2018-07',
    '2018-08',
    '2018-09',
    '2018-10',
    '2018-11',
    '2018-12',
    '2019-01',
    '2019-02',
    '2019-03',
    '2019-04',
    '2019-05',
    '2019-06',
    '2019-07']},
  'city': [62241,
   64442,
   65595,
   71106,
   69279,
   75834,
   80485,
   80164,
   79951,
   83821,
   76302,
   77526,
   82788,
   83751,
   88247,
   97223,
   91856,
   100055,
   91034,
   116587,
   118860,
   132494,
   131914,
   145229,
   152776,
   159412,
   172294,
   164054,
   161266,
   172253,
   175803,
   169743,
   172861,
   177690,
   160291]}}

## Write json to file
https://stackabuse.com/reading-and-writing-json-to-a-file-in-python/

In [64]:
dm.shape

(35, 1)

In [65]:
import json

# mydir = '../../../Documents/PROJECTS/VFH/COT_PAGE/INTERIM_DATA/JSONS/'
projdir = '../src/data/'

fname = projdir + 'fig1_dailytrips.json'
with open(fname, 'w') as outfile:
    json.dump(ptcjson, outfile)

In [66]:
print("200 OK")

200 OK
