# Preprocessing the data

In [1]:
# Importing the packages
import pandas as pd
import numpy as np

In [2]:
dataF = pd.read_excel('data.xlsx', index_col = False)

In [3]:
dataF.head()

Unnamed: 0,Heading,ID,Img_Front,Lat,Lon,Speed,Timestamp
0,NE,DB002,,18.606337,73.773585,54.159888,1537320354000
1,NE,DB002,img_front_DB002_2018_09_19_06_55_54_094000.jpg,18.606337,73.773585,54.159888,1537320354094
2,NE,DB002,img_front_DB002_2018_09_19_06_55_54_379000.jpg,18.606337,73.773585,54.159888,1537320354379
3,NE,DB002,img_front_DB002_2018_09_19_06_55_54_809000.jpg,18.606337,73.773585,54.159888,1537320354809
4,NE,DB002,img_front_DB002_2018_09_19_06_55_54_976000.jpg,18.606337,73.773585,54.159888,1537320354976


# Contains Latitude/ Longitude, Speed and Timestamp data of a car fitted with sensors

In [4]:
# Importing packages for preprocessing date-time in the dataframe
from datetime import datetime, date, time, timedelta

In [5]:
# Converting Unix Timestamp to Date-Time format
dataF["date_time"] = pd.to_datetime(dataF["Timestamp"], unit = "ms") 
dataF["date_time"] = dataF["date_time"] + timedelta(hours = 5, minutes = 30)
# Truncating seconds(since each sec has approx. 4 records)
dataF["date_time"] = dataF["date_time"].astype('<M8[m]')                

In [6]:
# Seperate columns for date and time
dataF["only_date"] = [date_time.date() for date_time in dataF["date_time"]]
dataF["only_time"] = [date_time.time() for date_time in dataF["date_time"]]

In [26]:
dataF

Unnamed: 0,Heading,ID,Img_Front,Lat,Lon,Speed,Timestamp,date_time,only_date,only_time,accln,speed_grp
0,NE,DB002,,18.606337,73.773585,54,1537320354000,2018-09-19 06:55:00,2018-09-19,06:55:00,-3.0,1
26,NE,DB002,,18.606855,73.772945,51,1537320360000,2018-09-19 06:56:00,2018-09-19,06:56:00,-17.5,2
242,NE,DB002,,18.610888,73.768840,19,1537320420000,2018-09-19 06:57:00,2018-09-19,06:57:00,-15.0,3
483,NE,DB002,,18.613554,73.766597,21,1537320480000,2018-09-19 06:58:00,2018-09-19,06:58:00,-3.5,4
705,NE,DB002,,18.615273,73.765071,12,1537320540000,2018-09-19 06:59:00,2018-09-19,06:59:00,13.5,5
898,NE,DB002,img_front_DB002_2018_09_19_06_59_59_739000.jpg,18.617094,73.763449,48,1537320600000,2018-09-19 07:00:00,2018-09-19,07:00:00,22.0,6
1105,NE,DB002,img_front_DB002_2018_09_19_07_00_59_840000.jpg,18.621992,73.759586,56,1537320660000,2018-09-19 07:01:00,2018-09-19,07:01:00,4.5,7
1329,NE,DB002,img_front_DB002_2018_09_19_07_01_59_248000.jpg,18.628027,73.754676,57,1537320720000,2018-09-19 07:02:00,2018-09-19,07:02:00,2.5,8
1544,NE,DB002,img_front_DB002_2018_09_19_07_02_59_719000.jpg,18.634243,73.750863,61,1537320780000,2018-09-19 07:03:00,2018-09-19,07:03:00,-20.5,9
1730,NE,DB002,,18.638961,73.750203,16,1537320840000,2018-09-19 07:04:00,2018-09-19,07:04:00,-15.5,10


In [9]:
#Dropping duplicates since in each second, there are three similar records with repeating Lat/ Long values
dataF.drop_duplicates(subset = "date_time", inplace = True) 

In [10]:
# Three days of data
dataF["only_date"].unique()

array([datetime.date(2018, 9, 19), datetime.date(2018, 9, 20),
       datetime.date(2018, 9, 21)], dtype=object)

In [11]:
# Converting the speed from float to int 
dataF["Speed"] = dataF["Speed"].astype(int)

In [12]:
# Finding the acceleration
dataF['accln'] = np.gradient(dataF["Speed"])

In [13]:
# Creating "speed_grp" column which assigns 0 when the speed values are same and 1 when the speed value changes in the 
# consecutive records. And then using cumsum() function to find the number of records where the car is travelling in the
# same speed.

dataF['speed_grp'] = (dataF.Speed.diff(1) != 0).astype('int').cumsum()

# Grouping it by "speed_grp" and keeping the required columns

temp = pd.DataFrame({'Lat_start' : dataF.groupby('speed_grp')["Lat"].first(), 
              'Lat_end' : dataF.groupby('speed_grp')["Lat"].last(),
              'Long_start': dataF.groupby('speed_grp')["Lon"].first(),
              'Long_end': dataF.groupby('speed_grp')["Lon"].last(),
              'Start_Time': dataF.groupby('speed_grp')["date_time"].first(),
              'End_Time': dataF.groupby('speed_grp')["date_time"].last(),
              'Speed': dataF.groupby('speed_grp')['Speed'].first(),
              'Acceleration': dataF.groupby('speed_grp')['accln'].first(),      
              'Consecutive' : dataF.groupby('speed_grp').size(), 
              }).reset_index(drop=True)
temp

Unnamed: 0,Lat_start,Lat_end,Long_start,Long_end,Start_Time,End_Time,Speed,Acceleration,Consecutive
0,18.606337,18.606337,73.773585,73.773585,2018-09-19 06:55:00,2018-09-19 06:55:00,54,-3.0,1
1,18.606855,18.606855,73.772945,73.772945,2018-09-19 06:56:00,2018-09-19 06:56:00,51,-17.5,1
2,18.610888,18.610888,73.768840,73.768840,2018-09-19 06:57:00,2018-09-19 06:57:00,19,-15.0,1
3,18.613554,18.613554,73.766597,73.766597,2018-09-19 06:58:00,2018-09-19 06:58:00,21,-3.5,1
4,18.615273,18.615273,73.765071,73.765071,2018-09-19 06:59:00,2018-09-19 06:59:00,12,13.5,1
5,18.617094,18.617094,73.763449,73.763449,2018-09-19 07:00:00,2018-09-19 07:00:00,48,22.0,1
6,18.621992,18.621992,73.759586,73.759586,2018-09-19 07:01:00,2018-09-19 07:01:00,56,4.5,1
7,18.628027,18.628027,73.754676,73.754676,2018-09-19 07:02:00,2018-09-19 07:02:00,57,2.5,1
8,18.634243,18.634243,73.750863,73.750863,2018-09-19 07:03:00,2018-09-19 07:03:00,61,-20.5,1
9,18.638961,18.638961,73.750203,73.750203,2018-09-19 07:04:00,2018-09-19 07:04:00,16,-15.5,1


In [14]:
# Importing packages for visualization using google maps

import gmaps
import os
import urllib.request
gmaps.configure('AIzaSyDtLvi-eBWmwuTdyoTBqe-jILouiXV-fT8')

In [15]:
# Mumbai
coordinates = (19.067274, 72.888661)
gmaps.figure(center=coordinates, zoom_level=12)

Figure(layout=FigureLayout(height='420px'))

In [16]:
#Pune
coordinates = (18.516726, 73.856255)
gmaps.figure(center=coordinates, zoom_level=12)

Figure(layout=FigureLayout(height='420px'))

In [17]:
#Mumbai to Pune directions
gmaps.configure(api_key='AIzaSyDtLvi-eBWmwuTdyoTBqe-jILouiXV-fT8')
#Define location 1 and 2
Pune = (18.528308,73.873303)
Mumbai = (19.072064,72.870643)
#Create the map
fig = gmaps.figure()
#create the layer
layer = gmaps.directions.Directions(Pune, Mumbai, mode='car')
#Add the layer
fig.add_layer(layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [18]:
#Halts
temp_Halts = temp[(temp["Speed"] < 10) & (temp["Consecutive"] > 1)]    # minute by minute analysis (by ignoring the seconds)
print("No of halts:", temp_Halts["Consecutive"].count())
temp_Halts


No of halts: 27


Unnamed: 0,Lat_start,Lat_end,Long_start,Long_end,Start_Time,End_Time,Speed,Acceleration,Consecutive
133,19.067274,19.06803,72.888661,72.887914,2018-09-19 09:35:00,2018-09-19 09:36:00,0,-1.5,2
144,19.072826,19.073116,72.883933,72.883619,2018-09-19 09:47:00,2018-09-19 09:48:00,3,1.5,2
150,19.075353,19.075439,72.878002,72.877876,2018-09-19 09:54:00,2018-09-19 09:55:00,0,-14.0,2
154,19.072149,19.07208,72.875552,72.875546,2018-09-19 09:59:00,2018-09-19 10:03:00,0,-0.5,5
160,18.528308,18.528273,73.873303,73.873366,2018-09-20 07:36:00,2018-09-20 07:44:00,0,-9.0,9
161,18.528278,18.52828,73.873356,73.873431,2018-09-20 07:45:00,2018-09-20 07:46:00,1,0.5,2
162,18.528266,18.528215,73.87349,73.873308,2018-09-20 07:47:00,2018-09-20 07:48:00,0,-0.5,2
163,18.528235,18.528241,73.873418,73.87341,2018-09-20 07:49:00,2018-09-20 07:50:00,1,0.5,2
164,18.528325,18.52823,73.873394,73.873339,2018-09-20 07:51:00,2018-09-20 07:52:00,0,-0.5,2
165,18.528274,18.528229,73.87343,73.873154,2018-09-20 07:53:00,2018-09-20 07:54:00,1,0.5,2


In [19]:
temp_Halts["lat-lon"] = list(zip(temp_Halts["Lat_start"], temp_Halts["Long_start"]))
markers = [x for x in temp_Halts["lat-lon"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [20]:
# Plotting the locations where the car is halted/ stopped due to traffic

fig = gmaps.figure()
markers = gmaps.marker_layer(markers)
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(height='420px'))

In [None]:
#Overspeed

In [21]:
temp_overspeed = temp[(temp["Speed"] > 75) & (temp["Consecutive"] > 1)]
print("instances of overspeeding",temp_overspeed["Consecutive"].count())
temp_overspeed.head()

instances of overspeeding 32


Unnamed: 0,Lat_start,Lat_end,Long_start,Long_end,Start_Time,End_Time,Speed,Acceleration,Consecutive
19,18.687856,18.697381,73.687121,73.680417,2018-09-19 07:14:00,2018-09-19 07:15:00,78,0.5,2
27,18.707318,18.708087,73.601896,73.578176,2018-09-19 07:23:00,2018-09-19 07:25:00,78,0.5,3
32,18.740846,18.749706,73.532183,73.527134,2018-09-19 07:30:00,2018-09-19 07:31:00,78,4.0,2
38,18.736679,18.733229,73.455999,73.444274,2018-09-19 07:37:00,2018-09-19 07:38:00,78,1.0,2
61,18.852525,18.861728,73.227953,73.222651,2018-09-19 08:18:00,2018-09-19 08:19:00,78,-0.5,2


In [22]:
# Plotting the locations where the car is overspeeding

locations = temp_overspeed[['Lat_start', 'Long_start']]
#Get the magnitude from the data
weights = temp_overspeed["Consecutive"]
#Set up your map
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights))
fig

Figure(layout=FigureLayout(height='420px'))

In [None]:
#High Acceleration

In [23]:
temp_accln = temp[temp["Acceleration"] > 8]
print("instances of high-acceleration",temp_accln["Consecutive"].count())
temp_accln.head()

instances of high-acceleration 122


Unnamed: 0,Lat_start,Lat_end,Long_start,Long_end,Start_Time,End_Time,Speed,Acceleration,Consecutive
4,18.615273,18.615273,73.765071,73.765071,2018-09-19 06:59:00,2018-09-19 06:59:00,12,13.5,1
5,18.617094,18.617094,73.763449,73.763449,2018-09-19 07:00:00,2018-09-19 07:00:00,48,22.0,1
10,18.643441,18.643441,73.749014,73.749014,2018-09-19 07:05:00,2018-09-19 07:05:00,30,11.0,1
14,18.660092,18.660092,73.730904,73.730904,2018-09-19 07:09:00,2018-09-19 07:09:00,26,11.0,1
15,18.662794,18.662794,73.725193,73.725193,2018-09-19 07:10:00,2018-09-19 07:10:00,72,24.5,1


In [24]:
# Plotting the locations of high acceleration

locations = temp_accln[['Lat_start', 'Long_start']]
#Get the magnitude from the data
weights = temp_accln["Consecutive"]
#Set up your map
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights))
fig

Figure(layout=FigureLayout(height='420px'))