In [1]:
import pandas as pd
from pymongo import MongoClient, GEO2D
import pymongo
import json
import datetime
from bson.son import SON
import folium 
import math
import numpy as np
from IPython.display import display

In [2]:
#Estableciendo la conexion con la base de datos aniv_cidade_event en MongoDB
db = MongoClient().aniv_cidade_event

In [4]:
#Punto inicial del omnibus 410 Praça Saens Pena
initial_point = (-22.923736, -43.235721)


In [5]:
#Punto final del omnibus 410 Terminal PUC
final_point = (-22.979408, -43.231352)

In [6]:
inverted_initial_point = (-43.235721, -22.923736)

In [7]:
inverted_final_point = (-43.231352, -22.979408)

In [8]:
#Function to compute the bus_ids circulating during a day 20150208 of bus line 438
def bus_id_list ():
    bus_id_20150208_410 = db.dados20150208_line_410.distinct("bus_id")
    return bus_id_20150208_410
    

In [9]:
#llamada a la funcion bus_id_list
busId_list = bus_id_list()
busId_list

[u'A72065',
 u'A72152',
 u'A72071',
 u'A72121',
 u'A72053',
 u'A72009',
 u'A72050',
 u'A72110',
 u'A72089']

In [10]:
#Funcion para meter los bus_Id del dataframe para un array
def get_busId_list():
    array_busId = []
    for bus in busId_list:
        strbus = str(bus)
        array_busId.insert(len(array_busId), strbus)
    return array_busId       
    

In [11]:
array_busId = get_busId_list()
array_busId

['A72065',
 'A72152',
 'A72071',
 'A72121',
 'A72053',
 'A72009',
 'A72050',
 'A72110',
 'A72089']

In [12]:
#funcion que dado un el bus_id de un omnibus y un punto inicial/final devuelve los puntos mas cercanos a ese punto. 
# A esa función se le debería pasar también la BD, pero ahora no se va a hacer asi
def near_points_to_origin (bus_id, inverted_origin_point):
    near_initial_points_array = []
    #la funcion $within recibe las coordenadas en orden inverso (primero longitud y después latitud)
    for row in db.dados20150208_line_410.find({"latlon": {"$within": {"$center": [inverted_origin_point, 0.00052543]}}, "bus_id": bus_id}).sort("timestamp",pymongo.DESCENDING):
        near_initial_points_array.insert(0, [row[u'timestamp'], row[u'latitude'], row[u'longitude'], row[u'speed']])
    #convert the array in panda dataframe 
    sorted_near_initial_points = pd.DataFrame(near_initial_points_array, columns=['timestamp', 'latitude', 'longitude', 'speed'])
    return sorted_near_initial_points

In [13]:
#LLamada a la funcion near_points_to_origin para encontrar los puntos más cercanos al 
#punto initial_point(-22.988199999999999, -43.228299999999997) aqui se pasan a la inversa
sorted_near_initial_points = near_points_to_origin ("A72065", inverted_initial_point)
sorted_near_initial_points

Unnamed: 0,timestamp,latitude,longitude,speed
0,08-02-2015 05:42:00,-22.923906,-43.235825,0.0
1,08-02-2015 05:43:11,-22.923958,-43.235874,1.11
2,08-02-2015 05:43:45,-22.923914,-43.23579,0.74
3,08-02-2015 05:44:55,-22.923861,-43.235775,0.19
4,08-02-2015 05:46:05,-22.923864,-43.235775,0.19
5,08-02-2015 05:47:15,-22.923885,-43.235741,0.19
6,08-02-2015 05:47:50,-22.923882,-43.23579,0.19
7,08-02-2015 05:49:00,-22.923878,-43.235741,0.19
8,08-02-2015 05:50:11,-22.923885,-43.23571,0.19
9,08-02-2015 05:50:46,-22.923864,-43.235725,0.19


In [14]:
#LLamada a la funcion near_points_to_origin para encontrar los puntos más cercanos a PUC
#Recordar que aqui las coordenadas del punto se pasan a la inversa(long, lat)
sorted_near_final_points = near_points_to_origin ("A72065", inverted_final_point)
sorted_near_final_points

Unnamed: 0,timestamp,latitude,longitude,speed
0,08-02-2015 06:56:57,-22.979334,-43.23159,11.48
1,08-02-2015 06:58:07,-22.979334,-43.23159,11.48
2,08-02-2015 06:59:11,-22.979334,-43.23159,11.48
3,08-02-2015 06:59:30,-22.979334,-43.23159,11.48
4,08-02-2015 07:01:11,-22.979334,-43.23159,11.48
5,08-02-2015 07:01:43,-22.979334,-43.23159,11.48
6,08-02-2015 07:02:24,-22.979334,-43.23159,11.48
7,08-02-2015 07:03:47,-22.979334,-43.23159,11.48
8,08-02-2015 07:04:57,-22.979334,-43.23159,11.48
9,08-02-2015 07:06:04,-22.979334,-43.23159,402.81


In [15]:
"""Funciones para renderizar el mapa en el html"""
from IPython.display import HTML
 
def inline_map(map):
    """
    Embeds the HTML source of the map directly into the IPython notebook.
    
    This method will not work if the map depends on any files (json data). Also this uses
    the HTML5 srcdoc attribute, which may not be supported in all browsers.
    """
    map._build_map()
    return HTML('<iframe srcdoc="{srcdoc}" style="width: 100%; height: 510px; border: none"></iframe>'.format(srcdoc=map.HTML.replace('"', '&quot;')))
 
def embed_map(map, path="map.html"):
    """
    Embeds a linked iframe to the map into the IPython notebook.
    
    Note: this method will not capture the source of the map into the notebook.
    This method should work for all maps (as long as they use relative urls).
    """
    map.create_map(path=path)
    return HTML('<iframe src="files/{path}" style="width: 100%; height: 510px; border: none"></iframe>'.format(path=path))

In [15]:
#pintando los puntos más cerca del punto inicial
fmap=folium.Map(location=[-22.950635, -43.210436], zoom_start=12)

for row in sorted_near_initial_points.iterrows():
    #The 'latitude' and 'longitude' names correspond to column names in the original cceats dataframe
    latlon = [ row[1]['latitude'], row[1]['longitude'] ]
    fmap.polygon_marker( latlon, fill_color='violet', num_sides=4, radius=6)

embed_map(fmap)

In [16]:
#pintando los puntos más cerca del punto final (Vila Isabel)
for row in sorted_near_final_points.iterrows():
    #The 'latitude' and 'longitude' names correspond to column names in the original cceats dataframe
    latlon = [ row[1]['latitude'], row[1]['longitude'] ]
    fmap.polygon_marker( latlon, fill_color='red', num_sides=4, radius=6)

embed_map(fmap)

In [17]:
#Pintando Circulos de radio de 1km alrededor de los puntos incial y final 
fmap.simple_marker(final_point, popup='Parada de Vila Isabel')
fmap.simple_marker(initial_point, popup='Parada Leblon')

#Punto inicial
fmap.circle_marker(location=final_point, radius=50,line_color='green',
                 fill_color='#3186cc', fill_opacity=0.2)

#Punto inicial
fmap.circle_marker(location=initial_point, radius=50,line_color='blue',
                 fill_color='#3186cc', fill_opacity=0.2)

embed_map(fmap)

In [26]:
#Array de timestamp puntos near punto incial (Leblon)
initial_points_timestamp = sorted_near_initial_points['timestamp']
initial_points_timestamp

0     08-02-2015 05:42:00
1     08-02-2015 05:43:11
2     08-02-2015 05:43:45
3     08-02-2015 05:44:55
4     08-02-2015 05:46:05
5     08-02-2015 05:47:15
6     08-02-2015 05:47:50
7     08-02-2015 05:49:00
8     08-02-2015 05:50:11
9     08-02-2015 05:50:46
10    08-02-2015 05:51:52
11    08-02-2015 05:52:42
12    08-02-2015 07:53:07
13    08-02-2015 07:53:42
14    08-02-2015 07:54:52
15    08-02-2015 07:56:02
16    08-02-2015 07:57:12
17    08-02-2015 07:57:47
18    08-02-2015 07:58:57
19    08-02-2015 08:00:07
20    08-02-2015 08:00:42
21    08-02-2015 08:01:52
22    08-02-2015 08:03:01
23    08-02-2015 15:14:47
24    08-02-2015 15:15:56
25    08-02-2015 15:17:07
26    08-02-2015 17:44:51
27    08-02-2015 17:46:01
28    08-02-2015 17:46:44
29    08-02-2015 19:53:58
30    08-02-2015 19:55:08
31    08-02-2015 19:56:18
32    08-02-2015 19:57:05
33    08-02-2015 19:59:25
34    08-02-2015 20:00:00
35    08-02-2015 20:01:10
36    08-02-2015 20:02:20
37    08-02-2015 20:02:55
38    08-02-

In [17]:
final_points_timestamp = sorted_near_final_points['timestamp']
final_points_timestamp

0     08-02-2015 06:56:57
1     08-02-2015 06:58:07
2     08-02-2015 06:59:11
3     08-02-2015 06:59:30
4     08-02-2015 07:01:11
5     08-02-2015 07:01:43
6     08-02-2015 07:02:24
7     08-02-2015 07:03:47
8     08-02-2015 07:04:57
9     08-02-2015 07:06:04
10    08-02-2015 07:07:14
11    08-02-2015 07:14:58
12    08-02-2015 09:07:16
13    08-02-2015 13:59:39
14    08-02-2015 14:02:00
15    08-02-2015 16:25:15
16    08-02-2015 16:25:50
17    08-02-2015 16:27:00
18    08-02-2015 16:28:10
19    08-02-2015 16:28:44
20    08-02-2015 16:29:55
21    08-02-2015 16:31:05
22    08-02-2015 16:32:15
23    08-02-2015 16:32:50
24    08-02-2015 16:34:00
25    08-02-2015 16:34:59
26    08-02-2015 16:36:09
27    08-02-2015 16:36:43
28    08-02-2015 16:37:54
29    08-02-2015 16:39:04
30    08-02-2015 16:40:14
31    08-02-2015 16:40:49
32    08-02-2015 16:41:59
33    08-02-2015 16:43:01
34    08-02-2015 16:44:11
35    08-02-2015 16:44:46
36    08-02-2015 16:45:56
37    08-02-2015 16:47:05
38    08-02-

In [18]:
#Funcion que retorna el arreglo con las horas de salida del omnibus al punto inicial/final del trayecto
def bus_departure_hour (initial_points_timestamp_list):
    list_departure_hours = []
    i = 0
    if len(initial_points_timestamp_list) >1:
        for i in range(0, len(initial_points_timestamp_list)-1):
            resto = datetime.datetime.strptime(initial_points_timestamp_list[i+1], "%d-%m-%Y %H:%M:%S")-datetime.datetime.strptime(initial_points_timestamp_list[i], "%d-%m-%Y %H:%M:%S")
            resto_total = resto.total_seconds()/60
            #si la diferencia entre los timestamp del omnibus en el putno supera los 40 minutos es q dio un viaje y regreso, sino lo supera es que se mantuvo ese tiempo esperando en el punto
            if (resto_total > 40):
                 list_departure_hours.insert(len( list_departure_hours),datetime.datetime.strptime(initial_points_timestamp_list[i], "%d-%m-%Y %H:%M:%S"))
        #siempre es necesario insertar el ultimo valor de la lista de "initial_points_timestamp" en la lista de  list_departure_hours 
        list_departure_hours.insert(len( list_departure_hours),datetime.datetime.strptime(initial_points_timestamp_list[i+1], "%d-%m-%Y %H:%M:%S"))
    else:
        list_departure_hours.insert(len( list_departure_hours),datetime.datetime.strptime(initial_points_timestamp_list[0], "%d-%m-%Y %H:%M:%S"))
    return list_departure_hours

In [23]:
#Llamada a la funcion bus_departure_hour para el punto Leblon (devuelve todas las horas en que salio el omnibus de Leblon)
leblon_departure_hours = bus_departure_hour(initial_points_timestamp)
leblon_departure_hours

[datetime.datetime(2015, 2, 8, 5, 52, 42),
 datetime.datetime(2015, 2, 8, 8, 3, 1),
 datetime.datetime(2015, 2, 8, 15, 17, 7),
 datetime.datetime(2015, 2, 8, 17, 46, 44),
 datetime.datetime(2015, 2, 8, 20, 4, 5)]

In [20]:
#Llamada a la funcion bus_departure_hour para el punto Vila Isabel  (devuelve todas las horas en que salio el omnibus de Vila Isabel)
vilaIsabel_departure_hours = bus_departure_hour(final_points_timestamp)
vilaIsabel_departure_hours

[datetime.datetime(2015, 2, 8, 7, 14, 58),
 datetime.datetime(2015, 2, 8, 9, 7, 16),
 datetime.datetime(2015, 2, 8, 14, 2),
 datetime.datetime(2015, 2, 8, 16, 47, 5),
 datetime.datetime(2015, 2, 8, 18, 48, 54)]

In [27]:
#Funcion que retorna el arreglo con las horas de llegada del omnibus al punto inicial/final del trayecto
def bus_arrive_hour (initial_points_timestamp_list):
    list_arrive_hours = []
    #siempre es necesario insertar el ultimo valor de la lista de "initial_points_timestamp" en la lista de leblon_departure_hours 
    list_arrive_hours.insert(len(list_arrive_hours),datetime.datetime.strptime(initial_points_timestamp_list[0], "%d-%m-%Y %H:%M:%S"))
    if len(initial_points_timestamp_list) >1:
        for i in range(0,  len(initial_points_timestamp_list)-1):
            resto = datetime.datetime.strptime(initial_points_timestamp_list[i+1], "%d-%m-%Y %H:%M:%S")-datetime.datetime.strptime(initial_points_timestamp_list[i], "%d-%m-%Y %H:%M:%S")
            resto_total = resto.total_seconds()/60
            if (resto_total > 40):
                list_arrive_hours.insert(len(list_arrive_hours),datetime.datetime.strptime(initial_points_timestamp_list[i+1], "%d-%m-%Y %H:%M:%S"))
    return list_arrive_hours

In [28]:
#Llamada a la funcion bus_arrive_hour para el punto Leblon  (devuelve todas las horas en que llegó el omnibus a Leblon)
arrive_hour_to_leblon = bus_arrive_hour(initial_points_timestamp)
arrive_hour_to_leblon

[datetime.datetime(2015, 2, 8, 5, 42),
 datetime.datetime(2015, 2, 8, 7, 53, 7),
 datetime.datetime(2015, 2, 8, 15, 14, 47),
 datetime.datetime(2015, 2, 8, 17, 44, 51),
 datetime.datetime(2015, 2, 8, 19, 53, 58)]

In [25]:
#Llamada a la funcion bus_arrive_hour para el punto Vila Isabel  (devuelve todas las horas en que llegó el omnibus a Vila Isabel)
arrive_hour_to_vilaisabel = bus_arrive_hour(final_points_timestamp)
arrive_hour_to_vilaisabel

[datetime.datetime(2015, 2, 8, 6, 56, 57),
 datetime.datetime(2015, 2, 8, 9, 7, 16),
 datetime.datetime(2015, 2, 8, 13, 59, 39),
 datetime.datetime(2015, 2, 8, 16, 25, 15),
 datetime.datetime(2015, 2, 8, 18, 42, 58)]

In [26]:
#Para convertir de formato timedelta para horas y minutos
def days_hours_minutes(travel_time):
    hours = travel_time.seconds//3600 
    minutes = (travel_time.seconds//60)%60
    #print '%d hours, %d minutes' % (hours,minutes)
    if minutes < 10:
        minutes = str(0)+str(minutes)
    return str(hours) + ":"+ str(minutes)

In [27]:
#Funcion que crea la tabla de viajes con los tiempos de viaje
def create_travel_table (name_A, arrive_array_A, departure_array_A, name_B, arrive_array_B, departure_array_B, bus_Id, bus_line):
    
    my_time_table = []   
    num_travel = 0    
    i = 0 #Cursor para moverme por el departure_array_A(arreglo q contiene las horas de salida del punto incial)   
    j = 0 #Cursor para moverme por el departure_array_B (arreglo q contiene las horas de salida del punto final)
   
    while (i<len(departure_array_A) and j< len(departure_array_B)):
        if (departure_array_A[i] < departure_array_B[j]):
            direction = name_A + "_to_" + name_B
            departure_hour = departure_array_A[i]
            i = i +1
            arrive_hour = arrive_array_B[j]
            num_travel = num_travel +1
            travel_time = days_hours_minutes(arrive_hour - departure_hour)          
            my_time_table.insert(len(my_time_table), [bus_line, bus_Id, direction, departure_hour,arrive_hour, travel_time])
                        
        elif(departure_array_B[j] < departure_array_A[i]):
            direction = name_B + "_to_" + name_A
            departure_hour = departure_array_B[j]
            j = j +1
            arrive_hour = arrive_array_A[i]
            num_travel = num_travel +1
            travel_time = days_hours_minutes(arrive_hour - departure_hour)          
            my_time_table.insert(len(my_time_table), [bus_line, bus_Id, direction, departure_hour,arrive_hour, travel_time])
            
    #Construir el dataframe pasandole el array con todos los datos de los viajes   
    mydf = pd.DataFrame(my_time_table, columns=['Line', 'Bus_Id','Direction','Departure_Hour', 'Arrive_Hour', 'Travel_Time'])       
    return mydf  


In [28]:
#dados para llamar a la funcion

mytabla = create_travel_table("Saens Pena", arrive_hour_to_leblon, leblon_departure_hours, "PUC", arrive_hour_to_vilaisabel, vilaIsabel_departure_hours, 'A72065', '410' )
mytabla

Unnamed: 0,Line,Bus_Id,Direction,Departure_Hour,Arrive_Hour,Travel_Time
0,410,A72065,Saens Pena_to_PUC,2015-02-08 05:52:42,2015-02-08 06:56:57,1:04
1,410,A72065,PUC_to_Saens Pena,2015-02-08 07:14:58,2015-02-08 07:53:07,0:38
2,410,A72065,Saens Pena_to_PUC,2015-02-08 08:03:01,2015-02-08 09:07:16,1:04
3,410,A72065,PUC_to_Saens Pena,2015-02-08 09:07:16,2015-02-08 15:14:47,6:07
4,410,A72065,PUC_to_Saens Pena,2015-02-08 14:02:00,2015-02-08 15:14:47,1:12
5,410,A72065,Saens Pena_to_PUC,2015-02-08 15:17:07,2015-02-08 16:25:15,1:08
6,410,A72065,PUC_to_Saens Pena,2015-02-08 16:47:05,2015-02-08 17:44:51,0:57
7,410,A72065,Saens Pena_to_PUC,2015-02-08 17:46:44,2015-02-08 18:42:58,0:56
8,410,A72065,PUC_to_Saens Pena,2015-02-08 18:48:54,2015-02-08 19:53:58,1:05


In [31]:
#Funcion que crea la tabla de tiempo de viaje para cada bus_id de los que transitaron el el dia
def all_travel_table (line, punto_ini_coord_invertidas, name_punto_inicio, punto_fin_coord_invertidas, name_punto_final, array_busId):
    new_df = pd.DataFrame(columns=['Line', 'Bus_Id','Direction','Departure_Hour', 'Arrive_Hour', 'Travel_Time'])  
    for busId in array_busId:
        sorted_near_initial_points1 = near_points_to_origin(busId, punto_ini_coord_invertidas)
        sorted_near_final_points1 = near_points_to_origin(busId, punto_fin_coord_invertidas)
        initial_points_timestamp1 = sorted_near_initial_points1 ['timestamp']
        final_points_timestamp1 = sorted_near_final_points1 ['timestamp']
        #chequear si la lista initial_points_timestamp es diferente de vacio
        if len(initial_points_timestamp1) and len(final_points_timestamp1):
            vilaIsabel_departure_hours1 = bus_departure_hour(initial_points_timestamp1)
            first_point_departure_hour = bus_departure_hour (initial_points_timestamp1)
            last_point_departure_hour = bus_departure_hour (final_points_timestamp1)
            first_point_arrive_hour = bus_arrive_hour(initial_points_timestamp1)
            last_point_arrive_hour = bus_arrive_hour(final_points_timestamp1)
            my_time_table = create_travel_table(name_punto_inicio, first_point_arrive_hour, first_point_departure_hour, name_punto_final, last_point_arrive_hour, last_point_departure_hour, busId, line)
            new_df = new_df.append(my_time_table, ignore_index=True)
    return new_df
        

        #el problema está en sorted_near_initial_points cuando devuelve vacio para algunos bus_id

In [32]:
query = all_travel_table ('410', inverted_initial_point, 'Saens Pena', inverted_final_point, 'PUC', array_busId)
query

Unnamed: 0,Line,Bus_Id,Direction,Departure_Hour,Arrive_Hour,Travel_Time
0,410,A72065,Saens Pena_to_PUC,2015-02-08 05:52:42,2015-02-08 06:56:57,1:04
1,410,A72065,PUC_to_Saens Pena,2015-02-08 07:14:58,2015-02-08 07:53:07,0:38
2,410,A72065,Saens Pena_to_PUC,2015-02-08 08:03:01,2015-02-08 09:07:16,1:04
3,410,A72065,PUC_to_Saens Pena,2015-02-08 09:07:16,2015-02-08 15:14:47,6:07
4,410,A72065,PUC_to_Saens Pena,2015-02-08 14:02:00,2015-02-08 15:14:47,1:12
5,410,A72065,Saens Pena_to_PUC,2015-02-08 15:17:07,2015-02-08 16:25:15,1:08
6,410,A72065,PUC_to_Saens Pena,2015-02-08 16:47:05,2015-02-08 17:44:51,0:57
7,410,A72065,Saens Pena_to_PUC,2015-02-08 17:46:44,2015-02-08 18:42:58,0:56
8,410,A72065,PUC_to_Saens Pena,2015-02-08 18:48:54,2015-02-08 19:53:58,1:05
9,410,A72071,PUC_to_Saens Pena,2015-02-08 07:28:08,2015-02-08 08:26:52,0:58
