# Imports (compatible for Python 2 and Python 3)

In [1]:
try:
    from StringIO import BytesIO as io
except ImportError:
    from io import BytesIO as io

try:
    from urllib import urlencode as urlencode
except ImportError:
    from urllib.parse import urlencode as urlencode
    
try:
    from urllib import urlopen as urlopen
except ImportError:
    from urllib.request import urlopen as urlopen
    
try:
    from urllib2 import HTTPError as HTTPError
except ImportError:
    from urllib.error import HTTPError as HTTPError

import ast
import pandas as pd

In [2]:
SQL_SOURCE = 'https://fb55.carto.com/api/v2/sql?q='

def queryCartoDB(query, formatting = 'CSV', source = SQL_SOURCE):
    '''queries carto datasets from a given carto account
    Arguments: 
    query - string: a valid sql query string
    format - outlut format  OPTIONAL (default CSV)
    source - a valid sql api endpoint OPTIONAL (default carto fb55 account)
    Returns:
    the return of the sql query AS A STRING
    NOTES:
    designed for the carto API, tested only with CSV return format'''
    
    
    data = urlencode({'format': formatting, 'q': query}).encode("utf-8")
    try:
        response = urlopen(source, data)
        return response.read()
    except HTTPError as e:
        raise (ValueError('\n'.join(ast.literal_eval(e.readline())['error'])))
        
def get_data(query):
    try:
        return pd.read_csv(io(queryCartoDB(query)), sep = ',')
    except ValueError as v:
        print (str(v))

In [3]:
# Query data from citibike database

query = '''SELECT * FROM fb55.citibike; '''

table= get_data(query)

table.head()

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,E 20 St & Park Ave,,175,-73.98752,1090,2015-02-01 01:23:00+00,2015-02-01 01:42:00+00,503,107,40.738274,229,Great Jones St,40.727434,-73.99379,19718,Subscriber,1961.0,1
1,,W 43 St & 10 Ave,,1159,-73.994618,682,2015-02-01 10:55:00+00,2015-02-01 11:07:00+00,515,1088,40.760094,490,8 Ave & W 33 St,40.751551,-73.993934,21501,Subscriber,1981.0,1
2,,E 6 St & Avenue B,,2827,-73.981854,751,2015-02-01 13:59:00+00,2015-02-01 14:11:00+00,317,2759,40.724537,466,W 25 St & 6 Ave,40.743954,-73.991449,14788,Subscriber,1990.0,1
3,,E 12 St & 3 Ave,,4961,-73.9889,272,2015-02-01 17:28:00+00,2015-02-01 17:32:00+00,483,4893,40.732233,345,W 13 St & 6 Ave,40.736494,-73.997044,16219,Subscriber,1961.0,1
4,,W 41 St & 8 Ave,,6156,-73.990026,240,2015-02-01 21:36:00+00,2015-02-01 21:40:00+00,477,6090,40.756405,490,8 Ave & W 33 St,40.751551,-73.993934,18266,Customer,,0


In [4]:
# Task1.1: Sort data by both: start_station_id ascending, and tripduration descending (hint: ASC, DESC)

query = '''SELECT * FROM fb55.citibike
            ORDER BY start_station_id ASC, tripduration DESC; '''

table1 = get_data(query)

table1.head(5)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,W 52 St & 11 Ave,,9115,-73.993929,2099,2015-02-03 10:28:00+00,2015-02-03 11:02:00+00,72,9054,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,18653,Subscriber,1966.0,2
1,,W 52 St & 11 Ave,,2051,-73.993929,1944,2015-02-01 12:39:00+00,2015-02-01 13:12:00+00,72,1983,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,17849,Customer,,0
2,,W 52 St & 11 Ave,,40355,-73.993929,1914,2015-02-07 09:49:00+00,2015-02-07 10:21:00+00,72,40341,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,21520,Subscriber,1966.0,2
3,,W 52 St & 11 Ave,,12996,-73.993929,1801,2015-02-04 06:32:00+00,2015-02-04 07:02:00+00,72,12938,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,15161,Subscriber,1959.0,2
4,,W 52 St & 11 Ave,,11359,-73.993929,1678,2015-02-03 18:21:00+00,2015-02-03 18:49:00+00,72,11298,40.767272,79,Franklin St & W Broadway,40.719116,-74.006667,21500,Subscriber,1964.0,1


In [5]:
# Task1.2: Select last 10 records of the table. Hint: use the table’s main id field

query = '''SELECT * FROM fb55.citibike
            ORDER BY cartodb_id DESC
            LIMIT 10; '''

table2 = get_data(query)

table2.head(10)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,E 4 St & 2 Ave,,46199,-73.98978,917,2015-02-07 23:59:00+00,2015-02-08 00:15:00+00,439,46200,40.726281,417,Barclay St & Church St,40.712912,-74.010202,20998,Subscriber,1965,2
1,,Carmine St & 6 Ave,,46198,-74.00215,548,2015-02-07 23:58:00+00,2015-02-08 00:08:00+00,368,46199,40.730386,334,W 20 St & 7 Ave,40.742388,-73.997262,19540,Subscriber,1983,2
2,,5 Ave & E 29 St,,46197,-73.986831,392,2015-02-07 23:57:00+00,2015-02-08 00:03:00+00,474,46198,40.745168,325,E 19 St & 3 Ave,40.736245,-73.984738,15545,Subscriber,1986,1
3,,5 Ave & E 29 St,,46196,-73.986831,428,2015-02-07 23:57:00+00,2015-02-08 00:04:00+00,474,46197,40.745168,325,E 19 St & 3 Ave,40.736245,-73.984738,16395,Subscriber,1986,2
4,,W 20 St & 8 Ave,,46195,-74.00004,689,2015-02-07 23:57:00+00,2015-02-08 00:08:00+00,470,46196,40.743453,325,E 19 St & 3 Ave,40.736245,-73.984738,15585,Subscriber,1953,1
5,,1 Ave & E 44 St,,46194,-73.969053,1422,2015-02-07 23:57:00+00,2015-02-08 00:20:00+00,455,46195,40.75002,265,Stanton St & Chrystie St,40.722293,-73.991475,20184,Subscriber,1960,2
6,,E 20 St & 2 Ave,,46193,-73.98205,993,2015-02-07 23:56:00+00,2015-02-08 00:13:00+00,461,46194,40.735877,295,Pike St & E Broadway,40.714067,-73.992939,16722,Subscriber,1974,1
7,,Warren St & Church St,,46192,-74.009106,1165,2015-02-07 23:56:00+00,2015-02-08 00:15:00+00,152,46193,40.71474,325,E 19 St & 3 Ave,40.736245,-73.984738,16978,Subscriber,1959,1
8,,W 47 St & 10 Ave,,46191,-73.993012,1248,2015-02-07 23:56:00+00,2015-02-08 00:17:00+00,495,46192,40.762699,432,E 7 St & Avenue A,40.726218,-73.983799,16300,Subscriber,1984,1
9,,W 17 St & 8 Ave,,46190,-74.001497,306,2015-02-07 23:55:00+00,2015-02-08 00:01:00+00,116,46191,40.741776,494,W 26 St & 8 Ave,40.747348,-73.997236,14736,Subscriber,1983,2


In [6]:
# Task1.3: List all unique birth years. Hint: distinct

query = '''SELECT DISTINCT birth_year FROM fb55.citibike
            ORDER BY birth_year ASC; '''

table3 = get_data(query)

table3

Unnamed: 0,birth_year,Unnamed: 1
0,1900.0,
1,1901.0,
2,1910.0,
3,1922.0,
4,1926.0,
5,1929.0,
6,1934.0,
7,1935.0,
8,1938.0,
9,1939.0,


In [7]:
# Task1.4: Find minimal, maximal and average trip duration hint: min(), max(), avg()

query = '''SELECT MIN(tripduration) AS min_trip_duration, MAX(tripduration) AS max_trip_duration, AVG(tripduration) AS avg_trip_duration
            FROM fb55.citibike; '''

table4 = get_data(query)

table4.head()

Unnamed: 0,min_trip_duration,max_trip_duration,avg_trip_duration
0,60,43016,675.865823


In [8]:
# Task 2.1: Select only trips that started at 1 AM hint: EXTRACT(HOUR FROM fieldname::time);

query = '''SELECT * FROM fb55.citibike
WHERE EXTRACT(HOUR FROM starttime::time) = 1; '''

table5 = get_data(query)

table5.head(10)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,E 20 St & Park Ave,,175,-73.98752,1090,2015-02-01 01:23:00+00,2015-02-01 01:42:00+00,503,107,40.738274,229,Great Jones St,40.727434,-73.99379,19718,Subscriber,1961.0,1
1,,8 Ave & W 33 St,,0,-73.993934,447,2015-02-01 01:11:00+00,2015-02-01 01:19:00+00,490,93,40.751551,479,9 Ave & W 45 St,40.760193,-73.991255,15185,Subscriber,1983.0,2
2,,E 11 St & 2 Ave,,39,-73.986724,1269,2015-02-06 01:13:00+00,2015-02-06 01:34:00+00,237,30897,40.730473,402,Broadway & E 22 St,40.740343,-73.989551,17274,Subscriber,1973.0,2
3,,St Marks Pl & 1 Ave,,158,-73.985649,245,2015-02-01 01:03:00+00,2015-02-01 01:07:00+00,438,89,40.727791,297,E 15 St & 3 Ave,40.734232,-73.986923,20142,Subscriber,1990.0,1
4,,E 2 St & 2 Ave,,159,-73.990697,384,2015-02-01 01:04:00+00,2015-02-01 01:10:00+00,403,90,40.725029,336,Sullivan St & Washington Sq,40.730477,-73.999061,20684,Subscriber,1979.0,1
5,,St Marks Pl & 1 Ave,,160,-73.985649,370,2015-02-01 01:10:00+00,2015-02-01 01:16:00+00,438,91,40.727791,545,E 23 St & 1 Ave,40.736502,-73.978095,15792,Subscriber,1985.0,1
6,,MacDougal St & Prince St,,161,-74.002971,731,2015-02-01 01:11:00+00,2015-02-01 01:23:00+00,128,92,40.727103,502,Henry St & Grand St,40.714215,-73.981346,19506,Subscriber,1970.0,1
7,,1 Ave & E 15 St,,162,-73.981656,275,2015-02-01 01:11:00+00,2015-02-01 01:16:00+00,504,94,40.732219,507,E 25 St & 2 Ave,40.739126,-73.979738,15327,Subscriber,1985.0,1
8,,Lafayette St & E 8 St,,163,-73.990765,998,2015-02-01 01:12:00+00,2015-02-01 01:29:00+00,293,95,40.730287,363,West Thames St,40.708347,-74.017134,15552,Subscriber,1986.0,2
9,,W 22 St & 8 Ave,,164,-73.999154,551,2015-02-01 01:13:00+00,2015-02-01 01:22:00+00,453,96,40.744751,546,E 30 St & Park Ave S,40.744449,-73.983035,17994,Subscriber,1961.0,1


In [9]:
# task 2.2: What is the average birth year of people that ride bikes at 2 AM?

query = '''SELECT avg(birth_year)
            FROM fb55.citibike
            WHERE EXTRACT(HOUR FROM starttime::time) = 2; '''

table6 = get_data(query)

table6

Unnamed: 0,avg,Unnamed: 1
0,1978.201754,


In [10]:
# Task 2.3: What is the age of the oldest person riding at 3 AM? Hint: age = 2018 – birth_year.
# (For this task we assume they were all born on the same day).

query = '''SELECT MAX(2018-birth_year)
            FROM ( SELECT birth_year
                    FROM fb55.citibike
                    WHERE EXTRACT(HOUR FROM starttime::time) = 3) as birth; '''

table7 = get_data(query)

table7

Unnamed: 0,max,Unnamed: 1
0,78,


In [11]:
# Task 3.1: Find the “start_station_id” that had the highest number of bikes taken from it
# hint: GROUP BY station id, COUNT();

query = '''SELECT start_station_id, COUNT(start_station_id)
            FROM fb55.citibike
            GROUP BY start_station_id
            ORDER BY count DESC
            LIMIT 1; '''

table8 = get_data(query)

table8.head(10)

Unnamed: 0,start_station_id,count
0,521,530


In [12]:
# Task 3.2: Show top 3 “end_station_id” with the largest total “tripduration”. 
# Hint: GROUP BY station id, SUM();

query = '''SELECT end_station_id, SUM(tripduration) as sum_trip_duartion
            FROM fb55.citibike
            GROUP BY end_station_id
            ORDER BY sum_trip_duartion ASC
            LIMIT 3; '''

table9 = get_data(query)

table9

Unnamed: 0,end_station_id,sum_trip_duartion
0,443,3208
1,2001,3429
2,144,3439


In [13]:
# Task3.3: Find the “start_station_id” with the shortest average trip duration during 1 AM.

query = '''SELECT start_station_id, AVG(tripduration) as avg_trip_duration
            FROM fb55.citibike
            GROUP BY start_station_id
            ORDER BY avg_trip_duration ASC
            LIMIT 3; '''

table10 = get_data(query)

table10

Unnamed: 0,start_station_id,avg_trip_duration
0,241,388.393939
1,239,390.526316
2,262,401.324324
