# Data Science Take Home

In [None]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pykalman import KalmanFilter
import pylab

pylab.rcParams['figure.figsize'] = (13, 10)

## Take a look at the input data

In [None]:
# Make time the index.

df = pd.read_csv('../../../data/TakeHomeData.txt',
                 parse_dates=[2],
                 sep='\t').set_index(['time']).sort_index()

df.head()

In [None]:
# Make sure that the time is a timestamp.
type(df.index[0])

In [None]:
# Follow 1 sessionid.
df[ df['sessionid']=='579f6f9b498e1fb7c47afc92' ]

In [None]:
df_sessionid_01 = df[ df['sessionid']=='579f6f9b498e1fb7c47afc92' ]
plt.subplot(2, 1, 1)
plt.plot(df_sessionid_01.index, df_sessionid_01['lat'], 'x-')
plt.title('')
plt.ylabel('lat (degrees)')

plt.subplot(2, 1, 2)
plt.plot(df_sessionid_01.index, df_sessionid_01['lng'], 'x-')
plt.title('')
plt.xlabel('Datetime')
plt.ylabel('lng (degrees)')

plt.show()

## Test the pykalman library

In [None]:
kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]])
measurements = np.asarray([[1,0], [0,0], [0,1]])  # 3 observations
kf = kf.em(measurements, n_iter=5)
(filtered_state_means, filtered_state_covariances) = kf.filter(measurements)
(smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)

smoothed_state_means

## Apply the Kalman filter to the given problem

In [None]:
delta_t = 1
F = [[1, 0, delta_t, 0], [0, 1, 0, delta_t], [0, 0, 1, 0], [0, 0, 0, 1]]
H = [[1, 0, 0, 0], [0, 1, 0, 0]]
measurements = df_sessionid_01.as_matrix(['lat', 'lng'])

kf = KalmanFilter(n_dim_state=4,
                  n_dim_obs=2,
                  transition_matrices = F,
                  observation_matrices = H,
                  initial_state_mean=[measurements[0, 0], measurements[0, 1], 0, 0])

kf = kf.em(measurements, n_iter=5)
(filtered_state_means, filtered_state_covariances) = kf.filter(measurements)
(smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)

In [None]:
plt.subplot(5, 1, 1)
plt.plot(df_sessionid_01.index, df_sessionid_01['lat'], 'x',
         df_sessionid_01.index, smoothed_state_means[:, 0], '--')
plt.title('')
plt.ylabel('lat (degrees)')

plt.subplot(5, 1, 2)
plt.plot(df_sessionid_01.index, smoothed_state_means[:, 2], '--')
plt.title('')
plt.ylabel('lat velocity')

plt.subplot(5, 1, 3)
plt.plot(df_sessionid_01.index, df_sessionid_01['lng'], 'x',
         df_sessionid_01.index, smoothed_state_means[:, 1], '--')
plt.title('')
plt.ylabel('lng (degrees)')

plt.subplot(5, 1, 4)
plt.plot(df_sessionid_01.index, smoothed_state_means[:, 3], '--')
plt.title('')
plt.ylabel('lng velocity')

v_threshold = 0.0002
b_stop = np.all([np.fabs(smoothed_state_means[:, 2]) < v_threshold,
                 np.fabs(smoothed_state_means[:, 3]) < v_threshold], axis=0)

plt.subplot(5, 1, 5)
plt.plot(df_sessionid_01.index, b_stop, '--')
plt.title('')
plt.ylabel('Stopped?')
plt.xlabel('Datetime')

plt.show()