In [54]:
from datetime import datetime
import pandas as pd
from geopy.distance import distance
import numpy as np

import random

import pymongo
from pymongo import MongoClient

In [55]:
client = MongoClient('localhost', 27017)

db = client['avl_pipeline_test']

trip_coll = db['clean_trips']
chnk_coll = db['chunk_info']

In [56]:
all_trips = trip_coll.distinct('trip_id_iso')

In [57]:
for trip in all_trips:
    
    for chunk in chnk_coll.find():

        chnk_num = "chunk_" + str(chunk['number_chunks'])

        start_ts = 0

        for seq, chunk_info in chunk['chunks'].items():

            best_dist = {
                'stop_dist':100000, 
                'time_stamp': 0}

            filter_search = {
                "trip_id_iso": trip,
                "time_stamp": {"$gte": start_ts}
            }

            for doc in trip_coll.find(filter_search).sort('time_stamp'):

                doc_lat = doc['LATITUDE']
                doc_lon = doc['LONGITUDE']

                cnk_lat = chunk_info['chunk_stop_lat']
                cnk_lon = chunk_info['chunk_stop_lon']

                doc_dist = distance((doc_lat, doc_lon), (cnk_lat, cnk_lon)).m

                if doc_dist < best_dist['stop_dist']:
                    best_dist['stop_dist'] = doc_dist
                    best_dist['time_stamp'] = doc['time_stamp']

            label_search = {
                "trip_id_iso": trip,
                "time_stamp": {
                    "$gte": start_ts,
                    "$lt": best_dist['time_stamp']
                }
            }

            for doc in trip_coll.find(label_search).sort('time_stamp'):

                doc_id = doc['_id']

                trip_coll.update_one({"_id": doc_id}, {
                    "$set": {
                        chnk_num: seq
                    }
                })


            start_ts = best_dist['time_stamp']

In [58]:
samp_start = list(trip_coll.aggregate([{'$sample':{'size':1}}]))[0]

In [59]:
samp_start

{'_id': ObjectId('5ae4da383ad39e17213cfc47'),
 'HEADING': '118.0',
 'LATITUDE': '37.75848',
 'LONGITUDE': '-122.44506',
 'PREDICTABLE': '1',
 'REPORT_TIME': '11/19/2016 16:50:51',
 'REV': '1526',
 'SPEED': '6.111',
 'TRAIN_ASSIGNMENT': '3307',
 'VEHICLE_TAG': '5489',
 'time_stamp': 1479603051.0,
 'trip_id_iso': '7253825_2016-11-19_MWYFC',
 'chunk_2': '1',
 'chunk_6': '3',
 'chunk_12': '6'}

In [33]:
samp_id = samp_start['trip_id_iso']

In [34]:
chnk_samp = chnk_coll.find_one()

In [35]:
chnk_num = "chunk_" + str(chnk_samp['number_chunks'])

In [36]:
start_ts = 0

for seq, chunk_info in chnk_samp['chunks'].items():
    
    # For each section of the chunk, I want to get all 
    # documents that begin at the start of that chunk.
    
    # I get the last chunk timestamp from the row that's closest.
    best_dist = {
        'stop_dist':100000, 
        'time_stamp': 0}
    
    filter_search = {
        "trip_id_iso": samp_id,
        "time_stamp": {"$gte": start_ts}
    }
    
    for doc in trip_coll.find(filter_search).sort('time_stamp'):

        
        doc_lat = doc['LATITUDE']
        doc_lon = doc['LONGITUDE']
        
        cnk_lat = chunk_info['chunk_stop_lat']
        cnk_lon = chunk_info['chunk_stop_lon']
        
        doc_dist = distance((doc_lat, doc_lon), (cnk_lat, cnk_lon)).m
        
        if doc_dist < best_dist['stop_dist']:
            best_dist['stop_dist'] = doc_dist
            best_dist['time_stamp'] = doc['time_stamp']
            
    label_search = {
        "trip_id_iso": samp_id,
        "time_stamp": {
            "$gte": start_ts,
            "$lt": best_dist['time_stamp']
        }
    }
    
    for doc in trip_coll.find(filter_search).sort('time_stamp'):
        
        doc_id = doc['_id']
        
        trip_coll.update_one({"_id": doc_id}, {
            "$set": {
                chnk_num: seq
            }
        })
        
        
    start_ts = best_dist['time_stamp']

In [37]:
trip_coll.find_one()

{'_id': ObjectId('5ae4dcb33ad39e1721401dad'),
 'HEADING': '82.0',
 'LATITUDE': '37.78693',
 'LONGITUDE': '-122.4565',
 'PREDICTABLE': '1',
 'REPORT_TIME': '10/28/2016 00:27:57',
 'REV': '1526',
 'SPEED': '0.0',
 'TRAIN_ASSIGNMENT': '3305',
 'VEHICLE_TAG': '5419',
 'minutes_noon_sqr': 480249,
 'sched_time_diff_seconds': 123,
 'service_id': 1,
 'time_stamp': 1477639677.0,
 'trip_id': 7253717,
 'trip_id_iso': '7253717_2016-10-28_JRMFT',
 'trip_start': 1,
 'chunk_2': '1'}

In [43]:
trip_id = '7253708_2016-12-05_DK564'

In [44]:
trip_coll.find({"trip_id_iso": trip_id}).count()

78

In [48]:
trip_coll.find_one({"trip_id_iso": trip_id, 'chunk_2': '1'})

{'_id': ObjectId('5ae4d96e3ad39e17213bf219'),
 'HEADING': '0.0',
 'LATITUDE': '37.78698',
 'LONGITUDE': '-122.45645',
 'PREDICTABLE': '1',
 'REPORT_TIME': '12/05/2016 07:53:36',
 'REV': '1526',
 'SPEED': '0.0',
 'TRAIN_ASSIGNMENT': '3302',
 'VEHICLE_TAG': '5423',
 'minutes_noon_sqr': 61009,
 'sched_time_diff_seconds': 84,
 'service_id': 1,
 'time_stamp': 1480953216.0,
 'trip_id': 7253708,
 'trip_id_iso': '7253708_2016-12-05_DK564',
 'trip_start': 1,
 'chunk_2': '1'}

In [47]:
trip_coll.find_one({"trip_id_iso": trip_id, 'chunk_2': '2'})

{'_id': ObjectId('5ae4d96e3ad39e17213bf240'),
 'HEADING': '84.0',
 'LATITUDE': '37.7608',
 'LONGITUDE': '-122.4364',
 'PREDICTABLE': '1',
 'REPORT_TIME': '12/05/2016 08:19:06',
 'REV': '1526',
 'SPEED': '7.5',
 'TRAIN_ASSIGNMENT': '3302',
 'VEHICLE_TAG': '5423',
 'time_stamp': 1480954746.0,
 'trip_id_iso': '7253708_2016-12-05_DK564',
 'chunk_2': '2'}

In [23]:
test_id = test['_id']

In [24]:
sample = {'chunk_2':'1'}

In [26]:
trip_coll.update_one({"_id": test_id}, {
    "$set": {
        "chunk_2": '1'
    }
})

<pymongo.results.UpdateResult at 0x1131e85a0>

In [27]:
trip_coll.find_one()

{'_id': ObjectId('5ae4dcb33ad39e1721401dad'),
 'HEADING': '82.0',
 'LATITUDE': '37.78693',
 'LONGITUDE': '-122.4565',
 'PREDICTABLE': '1',
 'REPORT_TIME': '10/28/2016 00:27:57',
 'REV': '1526',
 'SPEED': '0.0',
 'TRAIN_ASSIGNMENT': '3305',
 'VEHICLE_TAG': '5419',
 'minutes_noon_sqr': 480249,
 'sched_time_diff_seconds': 123,
 'service_id': 1,
 'time_stamp': 1477639677.0,
 'trip_id': 7253717,
 'trip_id_iso': '7253717_2016-10-28_JRMFT',
 'trip_start': 1,
 'chunk_2': '1'}