In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
df = pd.read_csv('frm_locations.csv')

In [4]:
df.head()

Unnamed: 0,frm_id,date,latitude,longitude
0,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-22T12:48:34+05:30,19.0647,72.89242
1,b96dc9bb896edad075da09f0c4e9098c,2020-07-20T09:51:41+05:30,28.460339,77.016685
2,84a9427621fe0669fbacbe6d7dc25b3d,2020-07-23T03:02:22+05:30,19.181723,72.99913
3,b96dc9bb896edad075da09f0c4e9098c,2020-07-21T21:19:46+05:30,28.452883,77.076515
4,b96dc9bb896edad075da09f0c4e9098c,2020-07-22T22:49:44+05:30,28.460367,77.016685


In [5]:
df.drop_duplicates(keep = 'first', inplace = True)

In [6]:
df[['date','time']] = df.date.str.split('T',expand = True)

In [7]:
df.rename(columns = {'latitude':'start_latitude',
                     'longitude':'start_longitude'},
         inplace = True)
df.sort_values(by = ['frm_id','date','time'],
               inplace = True)

In [8]:
df =  df[['frm_id','date','time','start_latitude','start_longitude']]

In [9]:
df.head()

Unnamed: 0,frm_id,date,time,start_latitude,start_longitude
2966,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,00:24:31+05:30,18.964563,72.84433
2462,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,00:24:36+05:30,19.064554,72.89265
2867,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,00:39:36+05:30,19.06468,72.892395
2521,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,00:54:33+05:30,19.06468,72.892395
2931,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,01:09:36+05:30,19.06468,72.892395


In [10]:
def find_distance(df):
    # Finding end points
    df['end_latitude'] = df['start_latitude'].shift(periods = -1)
    df['end_longitude'] = df['start_longitude'].shift(periods = -1)
    df['end_latitude'].iat[-1] = df['start_latitude'].iloc[-1] 
    df['end_longitude'].iat[-1] = df['start_longitude'].iloc[-1]
    
    #change on angles in radians
    df['delta_latitude[rad]'] = 0.5*(np.pi/180)*(df['end_latitude']
                                            -df['start_latitude']).apply(np.abs)
    df['delta_longitude[rad]'] = 0.5*(np.pi/180)*(df['end_longitude']
                                             -df['start_longitude']).apply(np.abs)
    
    #finding length of each path
    radius_of_earth = 6371 # in kms
    df['chord_length'] = 2*radius_of_earth*(((
                            df['delta_latitude[rad]'].apply(np.sin)**2
                            +((np.pi/180)*df['start_latitude']).apply(np.cos)
                            *((np.pi/180)*df['end_latitude']).apply(np.cos)
                            *df['delta_longitude[rad]'].apply(np.sin)**2)
                             .apply(np.sqrt)).apply(np.arcsin))
    
    
    return df

In [11]:
df = df.groupby(['frm_id','date']).apply(find_distance).reset_index()

In [12]:
df.head()

Unnamed: 0,index,frm_id,date,time,start_latitude,start_longitude,end_latitude,end_longitude,delta_latitude[rad],delta_longitude[rad],chord_length
0,2966,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,00:24:31+05:30,18.964563,72.84433,19.064554,72.89265,0.0008725861,0.0004216715,12.223948
1,2462,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,00:24:36+05:30,19.064554,72.89265,19.06468,72.892395,1.099557e-06,2.225295e-06,0.030241
2,2867,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,00:39:36+05:30,19.06468,72.892395,19.06468,72.892395,0.0,0.0,0.0
3,2521,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,00:54:33+05:30,19.06468,72.892395,19.06468,72.892395,0.0,0.0,0.0
4,2931,753aa4603d9f9f250ffe7f63e77bac1c,2020-07-20,01:09:36+05:30,19.06468,72.892395,19.064684,72.8924,3.490659e-08,4.363323e-08,0.000688


In [13]:
df_distance = df.loc[:,['frm_id','date','chord_length']]

In [14]:
#filtering out random points
lower_bound = 0.001 
upper_bound = 20
df_distance = df_distance[(df_distance['chord_length'] >= lower_bound)&(df_distance['chord_length'] <= upper_bound)]

In [15]:
#finding total distance travelled
df_distance = df_distance.groupby(['frm_id','date']).sum()

In [16]:
df_distance = df_distance.reset_index()

In [17]:
df_distance.rename(columns = {'chord_length':'Total_distance'},inplace = True)

In [18]:
dist_frame = df_distance.pivot(index = 'frm_id',columns = 'date', values = 'Total_distance' )

In [19]:
dist_frame

date,2020-07-20,2020-07-21,2020-07-22,2020-07-23,2020-07-24
frm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
753aa4603d9f9f250ffe7f63e77bac1c,127.66043,0.866403,0.52833,0.382577,0.445485
84a9427621fe0669fbacbe6d7dc25b3d,0.663759,132.13827,14.112246,0.62041,0.302441
ab472ecc61608f512e4b1d4c2b49e8f8,6.875255,8.042475,34.220689,70.884598,40.306332
b96dc9bb896edad075da09f0c4e9098c,102.070845,63.106936,23.215201,35.4095,14.050621
e89712a6621c47b9485e0b06841f809f,88.73289,431.03529,123.737458,120.577718,252.561389
f9a1994416db0f2255ef187f87c38638,0.044168,0.283884,0.49125,0.550873,0.557124
