---
# Data modeling GPX

Date: 02-11-2019 <br>
Concept version: 0.9 <br>
Author: Pieter Lems  <br>
Docent: J. Kroon<br>

© Copyright 2019 Ministerie van Defensie

This notebook wil provide information and scripts relating to creating data models for MongoDB.<br>
To create the data models we are going to use Python. 


The data sets in used in this notebook can be found in the folder ("Data/GPX")

## Contents of notebook
- Importing and exploring the data sets
    - Importing
    - Exploring
- Creating the model
- Loading the data
- Querying the data

    


## Tools 
- Mongo docker container <br>
docker run -d -p 27017:27017 mongo:latest
- Mongo compass <br>
wget https://downloads.mongodb.com/compass/mongodb-compass_1.15.1_amd64.deb <br>
sudo dpkg -i mongodb-compass_1.15.1_amd64.deb

---

Run the docker container 

In [1]:
!docker run -d -p 27017:27017 mongo:latest

f4e56b66ab42540e88e221fc206b73c6e4e40e4e36f7d2b234229cb90387f49d
docker: Error response from daemon: driver failed programming external connectivity on endpoint ecstatic_borg (e9050e435044511b6f77f9ebc55990bd7fa9696fe1c5f66aed83c11d86e05391): Bind for 0.0.0.0:27017 failed: port is already allocated.


---
# The data sets
---

In [15]:
import pandas as pd
from mongoengine import *
from datetime import datetime

## Importing

In [16]:
Biesbosch = pd.read_json('Data/Trail_Biesbosch.json')
Zeeland = pd.read_json('Data/Trail_Zeeland.json')

Exploring
---

In [17]:
Biesbosch

Unnamed: 0,lon,lat,alt,time,spd
0,6.170337,51.507614,16.48,1253895793000,
1,6.170337,51.507614,14.08,1253895794000,0.000000
2,6.170337,51.507614,17.44,1253895805000,0.000000
3,6.170337,51.507614,13.60,1253895806000,0.000000
4,6.170341,51.507612,11.19,1253895816000,0.127836
...,...,...,...,...,...
746,4.811186,51.706119,3.50,1253976147000,0.049099
747,4.811186,51.706119,5.91,1253976152000,0.000000
748,4.811186,51.706119,0.14,1253976153000,0.000000
749,4.811186,51.706119,5.42,1253976154000,0.000000


In [18]:
Zeeland

Unnamed: 0,lon,lat,alt,time,spd
0,4.708490,51.561330,-18.61,1295166887000,
1,4.708492,51.561331,-20.05,1295166888000,0.638721
2,4.708492,51.561329,-16.69,1295166891000,0.266868
3,4.708497,51.561329,-12.84,1295166895000,0.311073
4,4.708497,51.561329,-12.36,1295166896000,0.000000
...,...,...,...,...,...
2277,4.708559,51.561443,-53.70,1295194783000,1.059138
2278,4.708557,51.561441,-50.81,1295194841000,0.016253
2279,4.708557,51.561441,5.91,1295194842000,0.000000
2280,4.708557,51.561441,2.54,1295194843000,0.000000


---

# Creating the model

---

In [19]:
connect(alias='Trail_Database',db='Trail_Database')

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary())

In [20]:
class Route(Document):
    
    # Name of the route s_date+e_date
    name = StringField()
    
    # Start date
    s_date= DateTimeField()
    
    # End date
    e_date = DateTimeField()
    
    # Route type (Biking,Hiking,Driving )
    r_type = StringField() 
    
    
    meta = {'db_alias': 'Trail_Database'}


class Geometry(EmbeddedDocument):
 
    # coordinates of transmission coord=[1,2]
    coord = PointField()
    
    # altitude of tansmission
    alt = FloatField()
    
    meta = {'db_alias': 'Trail_Database'}
    
class Speed(EmbeddedDocument):
    
    # Speed entry
    spd = FloatField()
    
    meta = {'db_alias': 'Trail_Database'}
    

class Transmission(Document):
    
    # Timestamp of transmission
    time = DateTimeField()
    
    # Geometry of transmission
    geometry = EmbeddedDocumentField(Geometry)
    
    # Speed at time of transmission
    speed = EmbeddedDocumentField(Speed)
    
    # Reference to the route of transmission
    route = ReferenceField(Route)
    
    meta = {'db_alias': 'Trail_Database'}
    

    



---
# Loading the data
---

In [106]:
def load_data(df,name,type):
    
    # Get the time of the first column
    # /1000 to remove utc info
    s_date = datetime.fromtimestamp(
        (df.at[0,'time']/1000))
    
    # Get the time of the last column
    # dataframe at location [index lenght data frame -1, column name]
    # /1000 to remove utc info
    e_date = datetime.fromtimestamp(
        (df.at[len(df.index)-1,'time']/1000))
    
    # Compose the name of the route
    name = name+" "+ str(s_date)+"/"+ str(e_date)
    
    route = Route(name,s_date,e_date,type).save()
    
    
    # Itterate trough all rows in dataframe
    for index,row in df.iterrows():
        
        # Convert the datetime to a valid format
        time = datetime.fromtimestamp(row['time']/1000)
        
        # Create the geometry document
        geometry = Geometry([row['lon'],row['lat']], 
                            row['alt'])
        
        # Create the speed document
        # Make sure null values are inserted as 0
        if str(row['spd'])== 'nan':
            speed = Speed(0)
        else:
            speed = Speed(row['spd'])
    
        # Create transmission
        Transmission(time,
                    geometry,
                    speed,
                    route).save()
    print("Inserted " + str(len(df.index)))
    

In [None]:
load_data(Biesbosch,"Hiking Trail Biesbosch","Hiking")

In [88]:
load_data(Zeeland,"Biking Trail Zeeland","Biking")

Inserted 2282


---
# Querying the data


source = http://docs.mongoengine.org/guide/querying.html

---

In [89]:
#Select routes by name 
# Parameters:
# - route_name

def select_route_by_name(route_name):
    Route_By_Name = Route.objects(name__contains=route_name).to_json()
    return pd.read_json(Route_By_Name)
    

In [90]:
# Select all transmissions by route 
# Paramaters:
# - route_name

def select_transmissions_by_route(route_name):
    All_Transmissions_By_Route = Route.objects(name__contains=route_name).aggregate(*[
                                 {
                                  '$lookup': {
                                      'from': Transmission._get_collection_name(),
                                      'localField': '_id',
                                      'foreignField': 'route',
                                      'as': 'transmissions'}
                                 },
                                 {   '$unwind':"$transmissions" },
                                ])

    return pd.DataFrame(list(All_Transmissions_By_Route))


In [91]:
#Select all Transmissions by type
# parameters:
# - Route type

def select_transmissions_by_type(route_type):
    All_Transmissions_by_Type = Route.objects(r_type__contains=route_type).aggregate(*[
                                 {
                                  '$lookup': {
                                      'from': Transmission._get_collection_name(),
                                      'localField': '_id',
                                      'foreignField': 'route',
                                      'as': 'transmissions'}
                                 },
                                 {   '$unwind':"$transmissions" },
                                ])
    return pd.DataFrame(All_Transmissions_by_Type)


In [100]:
#Select all Transmissions in certain polygone
# Use https://www.keene.edu/campus/maps/tool/ to find desired polygone.
# parameters:
# - point 1
# - point 2
# - point 3
# - point 4

def select_transmissions_in_polygone(p1,p2,p3,p4):
    Transmissions_in_Polygone = Transmission.objects(geometry__coord__geo_within=[[p1,p2,p3,p4]]).to_json()
    return pd.DataFrame(eval(Transmissions_in_Polygone))


In [93]:
# Select all transmission near a certain point
# parameters
# longitude of point
# latitude of point
# distance around point (in meters)

def transmissions_near_point(lon,lat,distance):
    
    Transmissions_near_Point = Transmission.objects(geometry__coord__near=[lon, lat],
                                                    geometry__coord__max_distance=distance).to_json()
    
    return pd.read_json(Transmissions_near_Point)

In [94]:
# Select Tranmissions from biesbosch between certain timeframe 
All_Transmissions_Biesbosch_By_Transmission = Transmission.objects().aggregate(*[
                              {
                                '$lookup':
                                {
                                    'from': Route._get_collection_name(),
                                    'pipeline': [
                                        {
                                            '$match':{
                                                'r_type':{
                                                '$eq': "Hiking" }
                                            }
                                        }],
                                    'as': 'route'
                                }
                              },
                              {   '$unwind':"$route" },
                              ])

In [95]:
select_route_by_name("Zeeland")


Unnamed: 0,_id,name,s_date,e_date,r_type
0,{'$oid': '5db6dc24a9807adaac723ed5'},Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,{'$date': 1295170487000},{'$date': 1295198444000},Biking


In [96]:
select_transmissions_by_route('Biesbosch')

Unnamed: 0,_id,name,s_date,e_date,r_type,transmissions
0,5db6dc20a9807adaac723be5,Hiking Trail Biesbosch 2009-09-25 18:23:13/200...,2009-09-25 18:23:13,2009-09-26 16:43:22,Hiking,"{'_id': 5db6dc20a9807adaac723be6, 'time': 2009..."
1,5db6dc20a9807adaac723be5,Hiking Trail Biesbosch 2009-09-25 18:23:13/200...,2009-09-25 18:23:13,2009-09-26 16:43:22,Hiking,"{'_id': 5db6dc20a9807adaac723be7, 'time': 2009..."
2,5db6dc20a9807adaac723be5,Hiking Trail Biesbosch 2009-09-25 18:23:13/200...,2009-09-25 18:23:13,2009-09-26 16:43:22,Hiking,"{'_id': 5db6dc20a9807adaac723be8, 'time': 2009..."
3,5db6dc20a9807adaac723be5,Hiking Trail Biesbosch 2009-09-25 18:23:13/200...,2009-09-25 18:23:13,2009-09-26 16:43:22,Hiking,"{'_id': 5db6dc20a9807adaac723be9, 'time': 2009..."
4,5db6dc20a9807adaac723be5,Hiking Trail Biesbosch 2009-09-25 18:23:13/200...,2009-09-25 18:23:13,2009-09-26 16:43:22,Hiking,"{'_id': 5db6dc20a9807adaac723bea, 'time': 2009..."
...,...,...,...,...,...,...
746,5db6dc20a9807adaac723be5,Hiking Trail Biesbosch 2009-09-25 18:23:13/200...,2009-09-25 18:23:13,2009-09-26 16:43:22,Hiking,"{'_id': 5db6dc22a9807adaac723ed0, 'time': 2009..."
747,5db6dc20a9807adaac723be5,Hiking Trail Biesbosch 2009-09-25 18:23:13/200...,2009-09-25 18:23:13,2009-09-26 16:43:22,Hiking,"{'_id': 5db6dc22a9807adaac723ed1, 'time': 2009..."
748,5db6dc20a9807adaac723be5,Hiking Trail Biesbosch 2009-09-25 18:23:13/200...,2009-09-25 18:23:13,2009-09-26 16:43:22,Hiking,"{'_id': 5db6dc22a9807adaac723ed2, 'time': 2009..."
749,5db6dc20a9807adaac723be5,Hiking Trail Biesbosch 2009-09-25 18:23:13/200...,2009-09-25 18:23:13,2009-09-26 16:43:22,Hiking,"{'_id': 5db6dc22a9807adaac723ed3, 'time': 2009..."


In [97]:
select_transmissions_by_type("Biking")


Unnamed: 0,_id,name,s_date,e_date,r_type,transmissions
0,5db6dc24a9807adaac723ed5,Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,2011-01-16 09:34:47,2011-01-16 17:20:44,Biking,"{'_id': 5db6dc24a9807adaac723ed6, 'time': 2011..."
1,5db6dc24a9807adaac723ed5,Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,2011-01-16 09:34:47,2011-01-16 17:20:44,Biking,"{'_id': 5db6dc24a9807adaac723ed7, 'time': 2011..."
2,5db6dc24a9807adaac723ed5,Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,2011-01-16 09:34:47,2011-01-16 17:20:44,Biking,"{'_id': 5db6dc24a9807adaac723ed8, 'time': 2011..."
3,5db6dc24a9807adaac723ed5,Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,2011-01-16 09:34:47,2011-01-16 17:20:44,Biking,"{'_id': 5db6dc24a9807adaac723ed9, 'time': 2011..."
4,5db6dc24a9807adaac723ed5,Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,2011-01-16 09:34:47,2011-01-16 17:20:44,Biking,"{'_id': 5db6dc24a9807adaac723eda, 'time': 2011..."
...,...,...,...,...,...,...
2277,5db6dc24a9807adaac723ed5,Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,2011-01-16 09:34:47,2011-01-16 17:20:44,Biking,"{'_id': 5db6dc2da9807adaac7247bb, 'time': 2011..."
2278,5db6dc24a9807adaac723ed5,Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,2011-01-16 09:34:47,2011-01-16 17:20:44,Biking,"{'_id': 5db6dc2da9807adaac7247bc, 'time': 2011..."
2279,5db6dc24a9807adaac723ed5,Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,2011-01-16 09:34:47,2011-01-16 17:20:44,Biking,"{'_id': 5db6dc2da9807adaac7247bd, 'time': 2011..."
2280,5db6dc24a9807adaac723ed5,Biking Trail Zeeland 2011-01-16 09:34:47/2011-...,2011-01-16 09:34:47,2011-01-16 17:20:44,Biking,"{'_id': 5db6dc2da9807adaac7247be, 'time': 2011..."


In [101]:
# Hollands polygone
select_transmissions_in_polygone([-0.4006438,55.7271101],
                                [4.4759476,47.9310663],
                                [16.3379268,55.627996],
                                [-0.4006438,55.7271101])




Unnamed: 0,_id,time,geometry,speed,route
0,{'$oid': '5db6dc2da9807adaac7246f0'},{'$date': 1295197090000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 112.8095944983},{'$oid': '5db6dc24a9807adaac723ed5'}
1,{'$oid': '5db6dc26a9807adaac723ffe'},{'$date': 1295172294000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 122.2662980266},{'$oid': '5db6dc24a9807adaac723ed5'}
2,{'$oid': '5db6dc26a9807adaac723ffd'},{'$date': 1295172275000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 120.3974163801},{'$oid': '5db6dc24a9807adaac723ed5'}
3,{'$oid': '5db6dc2da9807adaac7246f1'},{'$date': 1295197108000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 114.2301573916},{'$oid': '5db6dc24a9807adaac723ed5'}
4,{'$oid': '5db6dc26a9807adaac723ffb'},{'$date': 1295172260000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 116.9490085477},{'$oid': '5db6dc24a9807adaac723ed5'}
...,...,...,...,...,...
3028,{'$oid': '5db6dc20a9807adaac723bf1'},{'$date': 1253971655000},"{'coord': {'type': 'Point', 'coordinates': [6....",{'spd': 0.0},{'$oid': '5db6dc20a9807adaac723be5'}
3029,{'$oid': '5db6dc20a9807adaac723be6'},{'$date': 1253902993000},"{'coord': {'type': 'Point', 'coordinates': [6....",{'spd': 0.0},{'$oid': '5db6dc20a9807adaac723be5'}
3030,{'$oid': '5db6dc20a9807adaac723be7'},{'$date': 1253902994000},"{'coord': {'type': 'Point', 'coordinates': [6....",{'spd': 0.0},{'$oid': '5db6dc20a9807adaac723be5'}
3031,{'$oid': '5db6dc20a9807adaac723be8'},{'$date': 1253903005000},"{'coord': {'type': 'Point', 'coordinates': [6....",{'spd': 0.0},{'$oid': '5db6dc20a9807adaac723be5'}


In [102]:
# Zeelands polygone
select_transmissions_in_polygone([2.8012454,52.5061913],
                                     [3.240578,50.4959579],
                                     [6.9803964,52.3051199],
                                     [2.8012454,52.5061913])


Unnamed: 0,_id,time,geometry,speed,route
0,{'$oid': '5db6dc2da9807adaac7246f0'},{'$date': 1295197090000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 112.8095944983},{'$oid': '5db6dc24a9807adaac723ed5'}
1,{'$oid': '5db6dc26a9807adaac723ffe'},{'$date': 1295172294000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 122.2662980266},{'$oid': '5db6dc24a9807adaac723ed5'}
2,{'$oid': '5db6dc26a9807adaac723ffd'},{'$date': 1295172275000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 120.3974163801},{'$oid': '5db6dc24a9807adaac723ed5'}
3,{'$oid': '5db6dc2da9807adaac7246f1'},{'$date': 1295197108000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 114.2301573916},{'$oid': '5db6dc24a9807adaac723ed5'}
4,{'$oid': '5db6dc26a9807adaac723ffb'},{'$date': 1295172260000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 116.9490085477},{'$oid': '5db6dc24a9807adaac723ed5'}
...,...,...,...,...,...
3016,{'$oid': '5db6dc25a9807adaac723f7d'},{'$date': 1295171347000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 59.0404749208},{'$oid': '5db6dc24a9807adaac723ed5'}
3017,{'$oid': '5db6dc25a9807adaac723f7e'},{'$date': 1295171349000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 65.2888232001},{'$oid': '5db6dc24a9807adaac723ed5'}
3018,{'$oid': '5db6dc25a9807adaac723f28'},{'$date': 1295170857000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 50.4028976675},{'$oid': '5db6dc24a9807adaac723ed5'}
3019,{'$oid': '5db6dc25a9807adaac723f27'},{'$date': 1295170849000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 55.6537885735},{'$oid': '5db6dc24a9807adaac723ed5'}


In [103]:
transmissions_near_point(4.80369, 51.711555,1)

Unnamed: 0,_id,time,geometry,speed,route
0,{'$oid': '5db6dc22a9807adaac723e83'},{'$date': 1253982580000},"{'coord': {'type': 'Point', 'coordinates': [4....",{'spd': 10.3313673472},{'$oid': '5db6dc20a9807adaac723be5'}
