In [14]:
from math import radians, sin, cos, asin, sqrt, pi, atan2
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
import time

In [4]:
file_name = "output10min.csv"
path = str(Path().resolve().parent) + "/raw-data/" + file_name
output_path = str(Path().resolve().parent) + "/processed-data/" + file_name

In [18]:
earth_radius_meters = 6362160.0 
def haversine(row):
    """
    Returns haversine distance between (lat, lon) and (last_lat, last_lon) in a dataframe.
    """
    try:
        lat1, lon1, lat2, lon2 = radians(row['lat']),radians(row['lon']), radians(row['last_lat']),radians(row['last_lon'])
        dlat, dlon = (lat2 - lat1, lon2 - lon1)
        a = sin(dlat/2.0)**2 + cos(lat1) * cos(lat2) * sin(dlon/2.0)**2
        great_circle_distance = 2 * asin(min(1,sqrt(a)))
        d = earth_radius_meters * great_circle_distance
    except:
        d = 0
    return d

def date_string(x):
    """
    Converts unix timestamp to datetime string
    """
    return datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S')

def to_unix(x):
    """
    Converts unix timestamp to datetime string
    """
    return datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S')

In [None]:
df = pd.read_csv(path)

In [None]:
df = df.sort_values(by='time')

In [None]:
df_temp = df[["id","operator","lat","lon"]].groupby(['id','operator']).shift(1)
df_temp.columns = ["last_lat", "last_lon"]
df = pd.concat([df,df_temp], axis = 1)

In [None]:
df['distanceTravelled'] = df.apply(haversine, axis=1)

In [None]:
df['time'] = df['time'].map(date_string)

In [None]:
df.to_csv(output_path, index = False)