In [None]:
# Mount The Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# Import Libraries
import os
import numpy as np
import pandas as pd
import time 
import datetime
import pickle
import random
import math
from geopy.distance import vincenty
pd.options.mode.chained_assignment = None
print("Libraries Are Imported")

In [None]:
# Define Path of Data
path = '---------------------------------'
os.chdir(path)

In [None]:
# Read The Labeled Data
Data = '003_Labeled_GeoLife.pickle'
infile = open(Data,'rb')
df = pickle.load(infile)
infile.close()

In [None]:
DF = pd.DataFrame()
for i , j in df.items():
  DF = DF.append(j)

In [None]:
# Select Just These Modes for the Project
Pr_Mode = ['bus', 'car', 'walk', 'bike', 'taxi', 'train', 'subway']

In [None]:
DF = DF[DF['Mode'].isin(Pr_Mode)]

In [None]:
DF.shape

In [None]:
df = DF.copy()

In [None]:
df.reset_index(drop = True, inplace = True)

In [None]:
# Drop rows with Equal Times
Equal_Times = []
for i in range(len(df) - 1):
  if df['Trip'].iloc[i] == df['Trip'].iloc[i+1] and df['TS'].iloc[i] == df['TS'].iloc[i+1]:
    Equal_Times.append(i)

df = df.drop(df.index[Equal_Times])

In [None]:
# Calculate Distance between GPS pints using Vincenty Formula
Dist = []
for i in range(len(df)-1):
  A = (df['Latitude'].iloc[i], df['Longitude'].iloc[i])
  B = (df['Latitude'].iloc[i+1], df['Longitude'].iloc[i+1])
  Dist.append(vincenty(A, B).meters)

In [None]:
# Insert Distance of First point from itself equal to zero
Dist.insert(0, 0)

In [None]:
df['Distance'] = Dist

In [None]:
# Distance at Start of each trip is equal to zero
for i in range(len(df)-1):
  if df['Trip'].iloc[i] != df['Trip'].iloc[i+1]:
    df['Distance'].iloc[i+1] = 0

In [None]:
# Calculate Delta Time
DT = []
for i in range(len(df)-1):
  DT.append(df['TS'].iloc[i+1] - df['TS'].iloc[i])

In [None]:
DT.insert(0, 0)

In [None]:
df['DT'] = DT

In [None]:
for i in range(len(df)-1):
  if df['Trip'].iloc[i] != df['Trip'].iloc[i+1]:
    df['DT'].iloc[i+1] = 0

In [None]:
# Compute the Speed 
df['Speed'] = df['Distance'] / df['DT']

In [None]:
df['Speed'].iloc[0] = 0.0

In [None]:
for i in range(len(df)-1):
  if df['Trip'].iloc[i] != df['Trip'].iloc[i+1]:
    df['Speed'].iloc[i+1] = 0

In [None]:
# Compute Delta Speed
D_Speed = []
for i in range(len(df)-1):
  D_Speed.append(df['Speed'].iloc[i+1] - df['Speed'].iloc[i])

In [None]:
D_Speed.insert(0, 0)

In [None]:
df['D_Speed'] = D_Speed

In [None]:
# Compute Acceleation or Decelaraion
df['Acc'] = df['D_Speed'] / df['DT']

In [None]:
df['Acc'].iloc[0] = 0.0

In [None]:
for i in range(len(df)-1):
  if df['Trip'].iloc[i] != df['Trip'].iloc[i+1]:
    df['Acc'].iloc[i+1] = 0

In [None]:
# Compute Delta Acceleration
D_Acc = []
for i in range(len(df)-1):
  D_Acc.append(df['Acc'].iloc[i+1] - df['Acc'].iloc[i])

In [None]:
D_Acc.insert(0, 0)

In [None]:
df['D_Acc'] = D_Acc

In [None]:
# Compute Jerk
df['Jerk'] = df['D_Acc'] / df['DT']

In [None]:
df['Jerk'].iloc[0] = 0.0

In [None]:
for i in range(len(df)-1):
  if df['Trip'].iloc[i] != df['Trip'].iloc[i+1]:
    df['Jerk'].iloc[i+1] = 0

In [None]:
# Change Latitude, Longitude, and Altitude from String to Number
df['Latitude'] = pd.to_numeric(df['Latitude'])
df['Longitude'] = pd.to_numeric(df['Longitude'])
df['Altitude'] = pd.to_numeric(df['Altitude'])

In [None]:
# Calculate Bearing feature between two GPS points 
bearing = []

for i in range(len(df)-1):
  y = math.sin(math.radians(df['Longitude'].iloc[i+1]) - math.radians(df['Longitude'].iloc[i])) * math.radians(math.cos(df['Latitude'].iloc[i+1]))
  x = math.radians(math.cos(df['Latitude'].iloc[i])) * math.radians(math.sin(df['Latitude'].iloc[i+1])) - \
                math.radians(math.sin(df['Latitude'].iloc[i])) * math.radians(math.cos(df['Latitude'].iloc[i+1])) \
                * math.radians(math.cos(df['Longitude'].iloc[i+1]) - math.radians(df['Longitude'].iloc[i]))
  b = (math.atan2(y, x) * 180. / math.pi + 360) % 360
  bearing.append(b)

In [None]:
bearing.insert(0,0)
df['Bearing'] = bearing

In [None]:
for i in range(len(df)-1):
  if df['Trip'].iloc[i] != df['Trip'].iloc[i+1]:
    df['Bearing'].iloc[i+1] = 0

In [None]:
# Extract Cumulative Distance Feature for each single Trip
ggg = df.groupby('Trip')
Cum = []

for i, j in ggg:
  Cum.append((np.cumsum(j['Distance'])))

In [None]:
temp = []
for i in range(len(Cum)):
  temp.extend(Cum[i])

In [None]:
df['Cum_Distance'] = temp

In [None]:
# Drop Extra Columns
df.drop(['D_Speed','D_Acc'],axis=1,inplace=True)

In [None]:
df.replace({"taxi": "car", "subway": "train"},inplace= True)

In [None]:
# Apply Exponential Moving Average to Noises for Speed Feature
All_Modes = ["bike", "bus", "car", "train", "walk"]
List = []
for item in All_Modes:
  mode = item
  print(mode)
  feature = "Speed"
  mean = df[df['Mode'] == mode][feature].mean()
  std = df[df['Mode'] == mode][feature].std()
  down_bound = mean - (3 * std)
  up_bound = mean + (3 *std)
  for i in range(len(df)):
    if df["Mode"].iloc[i] == mode and (df[feature].iloc[i] > up_bound or df[feature].iloc[i] < down_bound):
      df[feature].iloc[i] = (df[feature].ewm(alpha=0.1, adjust=False).mean().iloc[i])

In [None]:
# Apply Exponential Moving Average to Noises for Acceleration Feature
All_Modes = ["bike", "bus", "car", "train", "walk"]
List = []
for item in All_Modes:
  mode = item
  print(mode)
  feature = "Acc"
  mean = df[df['Mode'] == mode][feature].mean()
  std = df[df['Mode'] == mode][feature].std()
  down_bound = mean - (3 * std)
  up_bound = mean + (3 *std)
  for i in range(len(df)):
    if df["Mode"].iloc[i] == mode and (df[feature].iloc[i] > up_bound or df[feature].iloc[i] < down_bound):
      df[feature].iloc[i] = (df[feature].ewm(alpha=0.1, adjust=False).mean().iloc[i])

In [None]:
# Apply Exponential Moving Average to Noises for Jerk Feature
All_Modes = ["bike", "bus", "car", "train", "walk"]
List = []
for item in All_Modes:
  mode = item
  print(mode)
  feature = "Jerk"
  mean = df[df['Mode'] == mode][feature].mean()
  std = df[df['Mode'] == mode][feature].std()
  down_bound = mean - (3 * std)
  up_bound = mean + (3 *std)
  for i in range(len(df)):
    if df["Mode"].iloc[i] == mode and (df[feature].iloc[i] > up_bound or df[feature].iloc[i] < down_bound):
      df[feature].iloc[i] = (df[feature].ewm(alpha=0.1, adjust=False).mean().iloc[i])

In [None]:
# Drop Trips with Length Smaller than 60 rows of GPS Data
min = 60
DF = pd.DataFrame()
for i, j in df.groupby('Trip'):
  if len(j) > min:
    DF = DF.append(j)

In [None]:
DF.reset_index(drop = True, inplace = True)

In [None]:
with open("004_Preprocessed_Data.pickle", 'wb') as f: 
    pickle.dump(DF, f)