In [1]:
import os
import pandas as pd
import json
import numpy as np
from pyproj import Proj, transform
import warnings
import math
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
def covert_coordinate_from_4326_to_DC(lat,lon):
    inProj = Proj(init='epsg:4326')
    outProj = Proj(init='epsg:26985')
    lon2, lat2 = transform(inProj, outProj, lon, lat)
    return (lat2, lon2)

def cal_dist(ori_lat,ori_lon,des_lat,des_lon):
    lat1, lon1 = covert_coordinate_from_4326_to_DC(ori_lat.values,ori_lon.values)
    lat2, lon2 = covert_coordinate_from_4326_to_DC(des_lat.values,des_lon.values)
    distance = np.sqrt((lat1-lat2)**2+(lon1-lon2)**2)
    return distance

### Algorithm 1: Static Vehicle ID

In [3]:
def algo_1(scooter):
    scooter["time_code"]=pd.to_numeric(scooter["hour"]).astype('Int64')*60+pd.to_numeric(scooter["minute"]).astype('Int64')
    scooter=scooter.sort_values(by="time_code")
    duration=scooter["time_code"][1:].values-scooter["time_code"][:-1].values
    index=duration>1 # duration threshold
    if sum(index)<1:
        return pd.DataFrame()
    index_Ori=np.append(index,[False])
    index_Des=np.append([False],index)
    Ori=scooter[:][index_Ori]
    Des=scooter[:][index_Des]
        
    Ori.reset_index(inplace=True)
    Des.reset_index(inplace=True)
    OD=pd.merge(Ori,Des,left_index=True,right_index=True,suffixes=('_Ori','_Des'))
    OD["Duration"]=OD['time_code_Des']-OD['time_code_Ori']
    OD["Distance"]=cal_dist(OD["lat_Ori"],OD["lon_Ori"],OD["lat_Des"],OD["lon_Des"])
    OD.drop(columns=["index_Ori","index_Des","ID_Des","time_code_Ori","time_code_Des"],inplace=True)
    OD.rename(columns={'ID_Ori':'ID'},inplace=True)
    return OD

### Algorithm 2: Resetting Vehicle ID

In [4]:
def algo_2(scooter):
    id_list=scooter["ID"].unique()
    ori=pd.DataFrame()
    des=pd.DataFrame()
    scooter["time_code"]=pd.to_numeric(scooter["hour"]).astype('Int64')*60+pd.to_numeric(scooter["minute"]).astype('Int64')
    scooter=scooter.sort_values(by="time_code")
    for i in id_list:
        myscooter = scooter[:][scooter["ID"]==i]
        o=myscooter.iloc[-1,:]
        d=myscooter.iloc[0,:]
        ori=ori.append(o)
        des=des.append(d)
    
    return ori, des

### Algorithm 3: Dynamic Vehicle ID

In [5]:
def find_min_idx(x):
    # return (index,col)
    k = x.argmin()
    ncol = x.shape[1]
    return int(k/ncol), k%ncol

def covert_coordinate_from_DC_to_4326(lat,lon):
    outProj = Proj(init='epsg:4326')
    inProj = Proj(init='epsg:26985')
    lon2, lat2 = transform(inProj, outProj, lon, lat)
    return (lat2, lon2)

def distance_4326(d):
    lat1, lon1 = covert_coordinate_from_DC_to_4326(136906,396529)
    lat2, lon2 = covert_coordinate_from_DC_to_4326(136906+d,396529)
    distance = np.sqrt((lat1-lat2)**2+(lon1-lon2)**2)
    return distance

def pair(mat_dis,d_max):
    # points to be excluded, return [pool1 index, pool2 index]
    output=pd.DataFrame(columns=['pool1','pool2'])
    while mat_dis.min()<d_max:
        p2,p1=find_min_idx(mat_dis)
        output=output.append(pd.DataFrame([[p1,p2]],columns=['pool1','pool2']),ignore_index=True)
        #print([p1,p2])
        mat_dis[p2,:]=np.inf
        mat_dis[:,p1]=np.inf
    return output

def find_OD(pool1,pool2,d_threshold):
    # pool1:t; 
    # pool2:t+1; 
    # d_threshold (meter) distance threshold to identify the same scooter.
    d_max=distance_4326(d_threshold)  #transfer to Latitude and longitude coordinates.
    ori=pd.DataFrame()
    des=pd.DataFrame()
    
    if (len(pool1)+len(pool2))==0:
        return ori,des
    
    if len(pool1)*len(pool2)>0:
        x1=pool1['lat_x'].values
        y1=pool1['lon_x'].values
        x2=pool2['lat_y'].values
        y2=pool2['lon_y'].values
        m=len(x1)
        n=len(x2)
        mat_x1=np.tile(x1,(n,1))
        mat_y1=np.tile(y1,(n,1))
        mat_x2=np.transpose(np.tile(x2,(m,1)))
        mat_y2=np.transpose(np.tile(y2,(m,1)))
        mat_dis=np.sqrt(np.power(mat_x1-mat_x2,2)+np.power(mat_y1-mat_y2,2))
        pairs=pair(mat_dis,d_max)
        ori=pool1.reset_index().drop(pairs['pool1'].values)
        des=pool2.reset_index().drop(pairs['pool2'].values)
    
    if len(pool1)*len(pool2)==0:
        ori=pool1
        des=pool2
    
    ori=ori[['ID','hour_x','lat_x','lon_x','minute_x','second_x']]
    ori=ori.rename(columns={"hour_x": "hour", "lat_x": "lat", "lon_x":"lon", "minute_x":"minute", "second_x":"second"})
    des=des[['ID','hour_y','lat_y','lon_y','minute_y','second_y']]
    des=des.rename(columns={"hour_y": "hour", "lat_y": "lat", "lon_y":"lon", "minute_y":"minute", "second_y":"second"})
    return ori,des

def algo_3(data,d_threshold=100):
    pool1=pd.DataFrame() #pool for t
    pool2=pd.DataFrame() #pool for t+1
    ori=pd.DataFrame()
    des=pd.DataFrame()
    data['time']=data['day']*24*60+data['hour']*60+data['minute']
    time_list=np.sort(np.unique(data['time']))
    for i in range(len(time_list)-1):
        t0=data[:][data['time']==time_list[i]]
        t1=data[:][data['time']==time_list[i+1]]
        df0=pd.merge(t0,t1,on='ID',how='outer')
        if len(df0)<1:
            continue
        pool1=df0[:][np.isnan(df0['hour_y'])]
        pool2=df0[:][np.isnan(df0['hour_x'])]
        o,d=find_OD(pool1,pool2,d_threshold)
        ori=ori.append(o)
        des=des.append(d)
        if i%500==0:
            print(str(i)+' done...')
    return ori,des


### Demos

#### Algorithm 1

In [6]:
scooter_1 = pd.read_csv('Demo_Data_Static.csv')

In [7]:
OD = algo_1(scooter_1)
OD

Unnamed: 0,ID,hour_Ori,lat_Ori,lon_Ori,minute_Ori,second_Ori,day_Ori,hour_Des,lat_Des,lon_Des,minute_Des,second_Des,day_Des,Duration,Distance
0,11111,8,38.90226,-77.05917,15,0,1,9,38.95893,-77.00287,5,0,1,50,7962.712205


#### Algorithm 2

In [8]:
scooter_2 = pd.read_csv('Demo_Data_Resetting.csv')

In [9]:
ori, des = algo_2(scooter_2)

In [10]:
ori

Unnamed: 0,ID,day,hour,lat,lon,minute,second,time_code
10,11111.0,1.0,8.0,38.90226,-77.05917,15.0,0.0,495.0
33,22222.0,1.0,9.0,38.95893,-77.00287,27.0,0.0,567.0


In [11]:
des

Unnamed: 0,ID,day,hour,lat,lon,minute,second,time_code
0,11111.0,1.0,8.0,38.90226,-77.05917,5.0,0.0,485.0
11,22222.0,1.0,9.0,38.95893,-77.00287,5.0,0.0,545.0


#### Algorithm 3

In [12]:
scooter_3 = pd.read_csv('Demo_Data_Dynamic.csv')

In [13]:
ori, des = algo_3(scooter_3)

0 done...


In [14]:
ori

Unnamed: 0,ID,hour,lat,lon,minute,second
0,22222,8.0,38.90226,-77.05917,15.0,0.0


In [15]:
des

Unnamed: 0,ID,hour,lat,lon,minute,second
0,33333,9.0,38.95893,-77.00287,5.0,0.0
