In [4]:
from h3 import h3
import pandas as pd
import geopandas as gpd
from shapely.geometry import *
import warnings
import os
warnings.filterwarnings('ignore')
import pandas
import datetime as dt
import pytz

from cartoframes.data import Dataset
from cartoframes.viz import *
from cartoframes.viz.helpers import *
from cartoframes.viz.widgets import *
from cartoframes.auth import set_default_credentials

In [5]:
# get the carto access key (you need to run env.sh)
CARTO_KEY = os.environ['CARTO_KEY']
username = os.environ['CARTO_USER_NAME']
set_default_credentials(base_url='https://{}.carto.com'.format(username), api_key=CARTO_KEY)

In [6]:
# timezone we will be using through out
eastern = pytz.timezone('US/Eastern')

In [7]:
# load data
data_dir = './gps_data'
# file_path = 'trip_2019-11-07T08_47_36-0500_B63DD96A-C519-4300-AC34-44A92F94EE09_8997C1B9-4101-4901-9E73-AB204E60DBEF.csv'
file_path = 'trip_2019-11-11T09:03:37-0500_8D7798CA-59B6-49C4-A07C-CC52092A0FEA.csv'
header = ['timestamp', 'latitude', 'longitude']
path = os.path.join(data_dir, file_path)
df = pd.read_csv(path, header=None)
df.columns = header

dataframe = pd.DataFrame(columns=header)
timestamp = dt.datetime.fromtimestamp(0).astimezone(eastern)

for _, r in df.iterrows():
    # print(int(r['timestamp']))
    new_ts = dt.datetime.fromtimestamp(int(r['timestamp']))
    # new_ts = new_ts.replace(tzinfo=eastern)
    new_ts = new_ts.astimezone(eastern)
    
    if new_ts - timestamp > dt.timedelta(seconds=0):
        # print(new_ts, timestamp, new_ts-timestamp)
        timestamp = new_ts
        dataframe = dataframe.append(
            {'ts':str(int(r['timestamp'])),
                'timestamp': timestamp,
             'latitude': float(r['latitude']),
             'longitude': float(r['longitude'])
            }, ignore_index = True)

def get_loc_at(timestamp) :
    first = dataframe.iloc[0]
    last = dataframe.iloc[-1]
    if timestamp < first['timestamp']:
        return first['latitude'], first['longitude']
    
    if timestamp > last['timestamp']:
        return last['latitude'], first['longitude']
    
    closest= dataframe.iloc[(dataframe['timestamp']-timestamp).abs().argsort()[:2]]
    # print(closest['timestamp'].sort_values().tolist())
    ts = closest['timestamp'].sort_values().tolist()

    t = (timestamp - ts[0]).total_seconds() / (ts[1]-ts[0]).total_seconds()
    lats = closest['latitude'].sort_values().tolist()
    new_lat = (lats[1] - lats[0])* t + lats[0]
    lngs = closest['longitude'].sort_values().tolist()
    new_lng = (lngs[1] - lngs[0])* t + lngs[0]
    return new_lat, new_lng
    
get_loc_at(dt.datetime.fromtimestamp(1573134456.14).astimezone(eastern))
        


(42.35561593333333, -71.10384210000002)

In [8]:
clustered_dir = '../clustered_data/'
# cl_file = 'clustered_2019-11-07 08:47:36.127124.txt'
cl_file = 'clustered_2019-11-11 09:03:37.293894.txt'
path = os.path.join(clustered_dir, cl_file)
cl_data_timestamp = dt.datetime.strptime(cl_file, "clustered_%Y-%m-%d %H:%M:%S.%f.txt").replace(tzinfo=eastern)
cl_data = pandas.read_csv(path, header=None)[[0,10]]
cl_data.columns = ['timestamp', 'type']

cl_data['timestamp'] = cl_data['timestamp'].map(lambda x: dt.datetime.strptime(x, "%H:%M:%S.%f").replace(
    year=cl_data_timestamp.year,
    month=cl_data_timestamp.month,
    day=cl_data_timestamp.day,
).astimezone(eastern))

In [9]:

combined = pd.DataFrame(columns=['timestamp', 'latitute', 'longitude', 'type'])

locations = [get_loc_at(ts.to_pydatetime()) for ts in cl_data['timestamp']]


In [10]:
cl_data['latitude'] = 0.0
cl_data['longitude'] = 0.0
for i, _ in cl_data.iterrows():
    lat, lng = locations[i]
    cl_data['latitude'].iloc[i] = lat
    cl_data['longitude'].iloc[i] = lng

print(cl_data)

                             timestamp  type   latitude  longitude
0     2019-11-11 09:03:37.310000-05:00     1  42.355604 -71.103851
1     2019-11-11 09:03:37.380000-05:00     1  42.355604 -71.103851
2     2019-11-11 09:03:37.450000-05:00     1  42.355604 -71.103851
3     2019-11-11 09:03:37.520000-05:00     1  42.355604 -71.103851
4     2019-11-11 09:03:37.590000-05:00     1  42.355604 -71.103851
...                                ...   ...        ...        ...
13363 2019-11-11 09:19:09.150000-05:00     4  42.367723 -71.071177
13364 2019-11-11 09:19:09.220000-05:00     4  42.367723 -71.071177
13365 2019-11-11 09:19:09.280000-05:00     4  42.367724 -71.071176
13366 2019-11-11 09:19:09.350000-05:00     4  42.367724 -71.071176
13367 2019-11-11 09:19:09.420000-05:00     4  42.367724 -71.071176

[13368 rows x 4 columns]


In [11]:
Map(Layer(cl_data))

In [12]:
with open('combined_8D7798CA-59B6-49C4-A07C-CC52092A0FEA.csv','w') as f:
    f.write(cl_data.to_csv())
    
# you can see it in carto interface