## Scenario/Stakeholder Based Analysis of NYC taxi rides data
##### Authors: Panini Mokrala, Dmitrii Danilov

In [None]:
!pip install PyShp
!pip install sodapy

import io
import zipfile
import pandas as pd
import shapefile
import requests
from shapely.geometry import Polygon
from sodapy import Socrata
from google.cloud import bigquery
from google.oauth2 import service_account
import datetime as dt



In [None]:
def import_taxi_zones():
    taxi_zones = requests.get('https://s3.amazonaws.com/nyc-tlc/misc/taxi_zones.zip')
    with zipfile.ZipFile(io.BytesIO(taxi_zones.content), 'r') as myzip:
        sf = shapefile.Reader(shp=myzip.open('taxi_zones.shp'),
                              shx=myzip.open('taxi_zones.shx'),
                              dbf=myzip.open('taxi_zones.dbf'))

    fields = [x[0].lower() for x in sf.fields][1:]
    records = sf.records()
    shps = [Polygon(s.points).wkt for s in sf.shapes()]
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)
    
    return df

In [None]:
taxi_zones = import_taxi_zones()
taxi_zones.head()

Unnamed: 0,objectid,shape_leng,shape_area,zone,locationid,borough,coords
0,1,0.116357,0.000782,Newark Airport,1,EWR,"POLYGON ((933100.9183527103 192536.0856972019,..."
1,2,0.43347,0.004866,Jamaica Bay,2,Queens,"POLYGON ((1033269.243591294 172126.0078125, 10..."
2,3,0.084341,0.000314,Allerton/Pelham Gardens,3,Bronx,"POLYGON ((1026308.769506663 256767.6975403726,..."
3,4,0.043567,0.000112,Alphabet City,4,Manhattan,"POLYGON ((992073.4667968601 203714.0759887695,..."
4,5,0.092146,0.000498,Arden Heights,5,Staten Island,"POLYGON ((935843.3104932606 144283.335850656, ..."


In [None]:
client = Socrata("data.cityofnewyork.us", 'erkBtGgCm1QXwrGaILeRCD1Xw', timeout=500)
start_time = dt.datetime.now()
#results = client.get("t29m-gskq", query="select date_trunc_ym(tpep_pickup_datetime) as month, count(*) group by month")
results = client.get("biws-g3hs", query="select date_trunc_ym(tpep_pickup_datetime) as month, count(*) group by month order by month")
#results = client.get("k67s-dv2t", query="select date_trunc_ym(tpep_pickup_datetime) as month, count(*) group by month order by month")
end_time = dt.datetime.now()
print(f'Duration: {(end_time - start_time).seconds} sec')

results_df = pd.DataFrame.from_records(results)
print(results_df)

Duration: 57 sec
                      month     count
0   2001-01-01T00:00:00.000         3
1   2002-12-01T00:00:00.000         1
2   2003-01-01T00:00:00.000         5
3   2008-12-01T00:00:00.000        35
4   2009-01-01T00:00:00.000        69
5   2017-01-01T00:00:00.000   9710124
6   2017-02-01T00:00:00.000   9168825
7   2017-03-01T00:00:00.000  10294628
8   2017-04-01T00:00:00.000  10046188
9   2017-05-01T00:00:00.000  10102124
10  2017-06-01T00:00:00.000   9656993
11  2017-07-01T00:00:00.000   8588486
12  2017-08-01T00:00:00.000   8422197
13  2017-09-01T00:00:00.000   8945574
14  2017-10-01T00:00:00.000   9768740
15  2017-11-01T00:00:00.000   9284777
16  2017-12-01T00:00:00.000   9508050
17  2018-01-01T00:00:00.000        30
18  2018-02-01T00:00:00.000         6
19  2018-03-01T00:00:00.000         4
20  2018-04-01T00:00:00.000         9
21  2018-05-01T00:00:00.000         4
22  2041-11-01T00:00:00.000         1
23  2053-03-01T00:00:00.000         1


In [None]:
key_path = 'auth.json'
credentials = service_account.Credentials.from_service_account_file(key_path)

client = bigquery.Client(credentials=credentials, project=credentials.project_id)
sql = """
select datetime_trunc(pickup_datetime, month) as month, 
count(*) as count FROM bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2018 
where pickup_datetime between '2018-01-01' and '2019-01-01' group by month order by month;
"""
df = client.query(sql).to_dataframe()
df

Unnamed: 0,month,f0_
0,2018-01-01,8760090
1,2018-02-01,8493469
2,2018-03-01,18858487
3,2018-04-01,9305358
4,2018-05-01,9224100
5,2018-06-01,8713711
6,2018-07-01,7849588
7,2018-08-01,7849042
8,2018-09-01,8039936
9,2018-10-01,8821141


Hello World!