In [53]:
# !pip install polyline

In [54]:
import pandas as pd
import numpy as np
from numpy import cos, sin, arcsin, sqrt, arctan2, radians
import requests
import folium
import polyline
from folium.plugins import HeatMap
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import math

df = pd.read_csv('data/hours/12Htest.txt', header=None, names=['date time', 'longitude', 'latitude', 'label'])
df.head()

Unnamed: 0,date time,longitude,latitude,label
0,2008-02-04 12:02:27,116.35031,39.90114,1
1,2008-02-05 12:00:55,116.42951,39.89978,1
2,2008-02-06 12:02:45,116.41587,39.85668,1
3,2008-02-07 12:01:21,116.28772,39.91604,1
4,2008-02-08 12:03:04,116.44999,39.88159,1


In [62]:
# check if a point is in a polygon
def onSegment (start, end, x, y):
    x1 = start[0]
    y1 = start[1]
    x2 = end[0]
    y2 = end[1]
    dist = abs((y2-y1)*x - (x2-x1)*y + x2*y1 - y2*x1)/math.sqrt((y2-y1)**2 + (x2-x1)**2)
    if dist < 0.0003 and x <= max(x1,x2) and x >= min(x1,x2) and y <= max(y1,y2) and y >= min(y1,y2):
        return True
    return False

# print(onSegment(39.96211544796922, 116.42124630330393, 39.96218888796003, 116.42468830913569, 39.961859935630294, 116.42254194484401))
# print(onSegment(39.96211544796922, 116.42124630330393, 39.96218888796003, 116.42468830913569, 0, 0))


In [56]:
def get_route(pickup_lon, pickup_lat, dropoff_lon, dropoff_lat):
    
    loc = "{},{};{},{}".format(pickup_lon, pickup_lat, dropoff_lon, dropoff_lat)
    url = "http://router.project-osrm.org/route/v1/driving/"
    r = requests.get(url + loc + "?alternatives=true")
    print(url + loc + "?alternatives=true")
    if r.status_code!= 200:
        return {}
  
    res = r.json()   
    options = len(res['routes'])
    routes = []
    distances = []
    for i in range(options):
        routes.append(polyline.decode(res['routes'][i]['geometry']))
        distances.append(res['routes'][i]['distance'])
    start_point = [res['waypoints'][0]['location'][1], res['waypoints'][0]['location'][0]]
    end_point = [res['waypoints'][1]['location'][1], res['waypoints'][1]['location'][0]]
    
    out = { 'options': options,
            'route':routes,
            'start_point':start_point,
            'end_point':end_point,
            'distance':distances
          }

    return out

pickup = (39.90772518863834, 116.39751663173872)
dropoff = (39.95380284673872, 116.46232507838539)
print(get_route(pickup[1], pickup[0], dropoff[1], dropoff[0]))

http://router.project-osrm.org/route/v1/driving/116.39751663173872,39.90772518863834;116.46232507838539,39.95380284673872?alternatives=true
{'options': 1, 'route': [[(39.90791, 116.39751), (39.9079, 116.39702), (39.90628, 116.39687), (39.9071, 116.42896), (39.90695, 116.45585), (39.9056, 116.45619), (39.90588, 116.45566), (39.91691, 116.45557), (39.94796, 116.45589), (39.95001, 116.46225), (39.95401, 116.46225)]], 'start_point': [39.90791, 116.39751], 'end_point': [39.954005, 116.462245], 'distance': [11242.2]}


In [74]:
def getPoints(pickup, dropoff, df):
    route = get_route(pickup[1], pickup[0], dropoff[1], dropoff[0])
    final_df = pd.DataFrame(columns=['date time', 'longitude', 'latitude', 'label', 'segment'])

    for i in range(len(route['route'][0]) - 1):
        #print(route['route'][0][i][1], route['route'][0][i][0])
        start = (route['route'][0][i][0], route['route'][0][i][1])
        end = (route['route'][0][i+1][0], route['route'][0][i+1][1])

        # df2 gets the points in squared area defined by start and end
        #df2 = df[(df['longitude'] >= min(start[0], end[0])) & (df['longitude'] <= max(start[0], end[0])) & (df['latitude'] >= min(start[1], end[1])) & (df['latitude'] <= max(start[1], end[1]))]
        df3 = df[df.apply(lambda x: onSegment(start, end, x['latitude'], x['longitude']), axis=1)]
        df3['segment'] = i
        
        final_df = pd.concat([final_df, df3])
        # add the segment number to the row

    return final_df, route['route']


# final_df = pd.DataFrame(columns=['taxi ID', 'date time', 'longitude', 'latitude', 'distance', 'time', 'speed', 'label'])
# # print all lat and long of the route in the first option
# for i in range(len(route['route'][0]) - 1):
#     #print(route['route'][0][i][1], route['route'][0][i][0])
#     start = (route['route'][0][i][0], route['route'][0][i][1])
#     end = (route['route'][0][i+1][0], route['route'][0][i+1][1])

#     df3 = df2[df2.apply(lambda x: onSegment(start, end, x['latitude'], x['longitude']), axis=1)]
#     # concatenate df3 to final_df
#     final_df = pd.concat([final_df, df3])
#     print("ok")

In [58]:
# folium
def plotRoute(route, df):
    # dictionary for colors
    colorsPoint = {0:'green', 1:'orange', 2:'red', 3:'orange', 4:'green'}

    # create a map
    # folium map dark theme
    m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12,tiles='cartodbpositron')

    # plot the location points with color indicating the label
    for i in range(0, len(df)):
        folium.CircleMarker([df.iloc[i]['latitude'], df.iloc[i]['longitude']],
                            radius=0.2,
                            color=colorsPoint[df.iloc[i]['label']],
                            fill=True,
                            fill_color=colorsPoint[df.iloc[i]['label']],
                            fill_opacity=0.1).add_to(m)
# plot the route
    colorsRoute = ['blue', 'white', 'black', 'yellow', 'orange', 'purple', 'pink', 'black', 'white', 'gray']
    for i in range(0,len(route)):
        for j in range(0,len(route[i])):
            folium.Circle(
                location=[route[i][j][0], route[i][j][1]],
                radius=1,
                color=colorsRoute[i],
                fill=True,
                fill_color='crimson'
            ).add_to(m)
            #draw lines between points
            if j < len(route[i])-1:
                folium.PolyLine(locations=[[route[i][j][0], route[i][j][1]], [route[i][j+1][0], route[i][j+1][1]]], color=colorsRoute[i], weight=2.5, opacity=1).add_to(m)

    return m

In [77]:
# scenarios for the route
choice = int(input("Enter the scenario number: "))
if choice == 1:
    # Tiananmen to Hilton Beijing Hotem
    pickup = (39.90772518863834, 116.39751663173872)
    dropoff = (39.95380284673872, 116.46232507838539)
elif choice == 2:
    # Temple of Sun to Beijing Capital International Airport
    pickup = (39.913353949958264, 116.44391608840026)
    dropoff = (40.085754497062055, 116.6048974654586)
elif choice == 3:
    # Beijing Zoo to Wanning Bridge
    pickup = (39.93983489654245, 116.34012668521959)
    dropoff = (39.93657649154817, 116.39602849390053)
elif choice == 4:
    pickup = (39.86727466612916, 116.31253609321384)
    dropoff = (39.84981380032512, 116.34614515651923)
elif choice == 5:
    # hihgway to highway
    pickup = (39.861812076670375, 116.45517167743945)
    dropoff = (39.842329091349136, 116.47901118418928)

result, route = getPoints(pickup, dropoff, df)
m = plotRoute(route, result)
m

http://router.project-osrm.org/route/v1/driving/116.44391608840026,39.913353949958264;116.6048974654586,40.085754497062055?alternatives=true


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['segment'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['segment'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3['segment'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

In [79]:
# print the number of points by label and segment
print(result.groupby(['segment', 'label']).size())

segment  label
0        1          3
1        1         33
         2          2
2        1         15
         2          3
3        1        136
         2          9
4        1         27
         2          7
5        1         10
         2          1
6        1         23
         2          4
7        1          1
dtype: int64


In [60]:
# # apply k-means clustering
# from sklearn.cluster import KMeans

# # get the data
# df = pd.read_csv('data/hours/12Hcomplete.txt',  header=None, names=['taxi ID', 'date time', 'longitude', 'latitude', 'distance', 'time', 'speed'])
# X = df[['distance', 'longitude', 'latitude', 'speed', 'time']].values
# # apply k-means clustering
# kmeans = KMeans(n_clusters=3, random_state=0).fit(X)

# # get the labels
# labels = kmeans.labels_
# df['labels'] = labels
# # # add the labels to the data
# df2 = df[['date time', 'longitude', 'latitude', 'labels']]

# # # count the different values of the labels
# df2['labels'].value_counts()

In [61]:
# # write in a csv the final dataframe
# for index, row in df.iterrows():
#     if(row['labels'] != 0):
#         with open('data/hours/12Htest.txt', 'a') as f:
#             f.write(str(row['date time']) + ',' + str(row['longitude']) + ',' + str(row['latitude']) + ',' + str(row['labels']) + '\n')