# XYT mining op de weg

In deze project identificeren we start- en stoplocaties op basis van XYT gegevens. 

In [None]:
##import packages
import pandas as pd
pd.set_option('mode.chained_assignment', None)
import pandas as pd
import os
import datetime
import fnmatch
import concurrent.futures
import numpy as np
from geopy.distance import geodesic
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
import mpld3
%matplotlib inline
#from geopy.distance import great_circle
#from geopy.distance import vincenty

Eerst voegen we twee dagen aan XYT data samen

In [None]:
## Import files
path = 'Data'

with concurrent.futures.ProcessPoolExecutor() as executor:
    configfiles = [os.path.join(dirpath, f)
        for dirpath, dirnames, files in os.walk(path)
        for f in fnmatch.filter(files, '*.csv')]
        
list = []
for file in configfiles:
    df = pd.read_csv(file,index_col=None, header=0, usecols=[0,1,2,3])
    list.append(df)
    df = pd.concat(list)
    
##drop index + create index
df.reset_index(inplace=True)
df['index'] = df.index

Omzetten van UTC tijd naar locale tijd (Amsterdam) en toevoegen tijdskolommen

In [None]:
# Omzetten van UTC tijd naar locale tijd (Amsterdam) en verwijderen overbodige kolommen
df['Time'] = pd.DatetimeIndex(df['Dt']).time
df['Date'] = pd.DatetimeIndex(df['Dt']).date
df['DateTime_UTC'] = df.apply(lambda r : pd.datetime.combine(r['Date'],r['Time']),1)
df['DateTime_Local'] = df['DateTime_UTC'].dt.tz_localize('utc').dt.tz_convert('Europe/Amsterdam')
del df['DateTime_UTC']
del df['Time']
del df['Date']

In [None]:
df['Date'] = pd.DatetimeIndex(df['DateTime_Local']).date
df['Time'] = pd.DatetimeIndex(df['DateTime_Local']).time
df['DateTime'] = df.apply(lambda r : pd.datetime.combine(r['Date'],r['Time']),1)
df['Year'] = pd.DatetimeIndex(df['DateTime']).year
df['Month'] = pd.DatetimeIndex(df['DateTime']).month
df['Day'] = pd.DatetimeIndex(df['DateTime']).day
df['Weeknr'] = pd.DatetimeIndex(df['DateTime']).week
df['Weekdag'] = pd.DatetimeIndex(df['DateTime']).weekday
df['Hour'] = pd.DatetimeIndex(df['DateTime']).hour
del df['DateTime_Local']

Daarna bereken we de afstand tussen twee XY punten aan de hand van de geodesics formule van Karney (2013). Deze formule zit in de package Geopy.

In [None]:
##New table: van a naar b --> drop na 
df = df.rename(index=str, columns={"Lon": "Lon_a", "Lat": "Lat_a"})
df["Lat_b"] = df["Lat_a"].shift(-1)
df["Lon_b"] = df["Lon_a"].shift(-1)
df = df.dropna()

##Measure distance
def distancer_km(row):
    coords_1 = (row['Lat_a'], row['Lon_a'])
    coords_2 = (row['Lat_b'], row['Lon_b'])
    return geodesic(coords_1, coords_2).km
    #return vincenty(coords_1, coords_2).km

def distancer_m(row):
    coords_1 = (row['Lat_a'], row['Lon_a'])
    coords_2 = (row['Lat_b'], row['Lon_b'])
    return geodesic(coords_1, coords_2).m
    #return vincenty(coords_1, coords_2).km

df['distance_km'] = df.apply(distancer_km, axis=1)
df['distance_m'] = df.apply(distancer_m, axis=1)

Nu gaan we het verschil in seconden tussen twee XY punten berekenen. Hiervoor gebruiken we de package Numpy. 

In [None]:
##Change date format
df["date_a"] = np.array(df["DateTime"], dtype="datetime64")

##difference in seconds between two XY
df["date_b"] = df["date_a"]
df["date_b"] = df["date_b"].shift(-1)
df["date_b"] = df["date_b"].dropna()
df["diff"] = df["date_b"] - df["date_a"]
df["diff_sec"] = df["diff"].astype('timedelta64[s]')

Door het verschil in seconden/uur te delen door verschil in meters/kilometers kunnen we de snelheid berekenen. 

In [None]:
## meters per second / km per hour
df["speed_ms"] = df["distance_m"]/df["diff_sec"]
df["speed_kmu"] = df["distance_km"]/df["diff_sec"].divide(60*60)

In [None]:
df.head()

In [None]:
#set date as index
df.set_index('DateTime',inplace=True)

In [None]:
y = df["distance_km"]

# Set the style to `ggplot`
plt.style.use("ggplot")

# Initialize the plot
plt.figure(figsize=(20,9))

# Plot the data
plt.plot(y, label='Afstand per minuut', color="b")

# Add a legend
plt.legend(loc='upper left')

# Add titles
plt.title('Afgelegde afstand per minuut')
plt.ylabel('Afstand (km)')
plt.xlabel('Tijd')

# Format the minor tick labels of the y-axis into empty strings with
# `NullFormatter`, to avoid cumbering the axis with too many labels.
plt.gca().yaxis.set_minor_formatter(NullFormatter())
plt.gcf().autofmt_xdate()

# Show the plot
plt.show()