In [2]:
import pandas as pd # Pandas is used to read the JSON dataset.
from mongoengine import * # MongoEngine is used to model, import and index datasets.
from datetime import datetime # The Python datetime module is used to convert timestamps.

In [3]:
SW_Crane_Frida = pd.read_json('Frida_SW.json') # Read the dataset in a dataframe

In [4]:
SW_Crane_Frida[:1] # Print the first row of the dataframe.

Unnamed: 0,event-id,study-name,timestamp,visible,ground-speed,heading,location-long,location-lat,height-above-ellipsoid,individual-taxon-canonical-name,sensor-type,tag-voltage,individual-local-identifier
0,1154727247,"GPS telemetry of Common Cranes, Sweden",2013-07-21 03:06:32,True,0.0,,13.583908,57.503796,193,Grus grus,gps,4110.0,9381


In [15]:
connect('Crane_Database') # Connect to the Crane_Database.

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary())

In [6]:
# Creating the Tracker document 
class Tracker(Document):
    
    # Name of the study
    study_name = StringField()
    
    # Name of the bird, in latin.
    individual_taxon_canonical_name = StringField()
    
    # Id of the Crane.
    individual_local_identifier = IntField()
    
    #Start date of the study.
    start_date = DateTimeField()
    
    #End date of the study.
    end_date = DateTimeField()

    #Name of the Crane
    name = StringField()
    
    #Amount of the transmissions related to the tracker.
    transmission_Count= IntField()

In [6]:
# Creating the Geometry document
class Geometry(EmbeddedDocument):
 
    # coordinates of transmission (coord=[1,2])
    coord = PointField()
    
    # altitude of tansmission
    alt = FloatField()

In [6]:
# Creating the Speed document    
class Speed(EmbeddedDocument):
    
    # Speed of the Crane
    ground_speed = FloatField()
    
    # Heading of the Crane in degrees
    heading = IntField()

In [6]:
# Creating the TrackerMetadata document
class TrackerMetadata(EmbeddedDocument):
    
    #Is the tracker still visible or not?
    visible = BooleanField()
    
    # Type of sensor used in tracker.
    sensor_type = StringField()
    
    # Voltage level of the tracker.
    tag_voltage = FloatField()

In [17]:
# Creating the Transmission document 
class Transmission(Document):
    
    # Identifier of the transmission
    event_id = IntField()
    
    # Timestamp of when transmission was send 
    timestamp = DateTimeField()
    
    # Embedded geometry of transmission
    # When loading the data, an instance of a geometry document
    # will be passed as value for the geometry field.
    geometry = EmbeddedDocumentField(Geometry)
    
    # Embedded speed related data of transmission
    # When loading the data, an instance of a speed document
    # will be passed as value for the speed field.
    speed = EmbeddedDocumentField(Speed)
    
    # Embedded metadata of transmission
    # When loading the data, an instance of a TrackerMetadata document
    # will be passed as value for the metadata field.
    metadata = EmbeddedDocumentField(TrackerMetadata)
    
    # Reference to the tracker the transmission belongs to.
    # When loading the data, an instance of a Tracker document
    # will be passed as value for the tracker field.
    tracker = ReferenceField(Tracker)

In [18]:
# Here we create the function which we will use to load the datasets.
# The function takes the following input parameters:
# 1) A Dataframe to load
# 2) The name of the Crane
# 3) The country of the Crane. This will come in usefull when loading 
#    different types of Cranes (German or Swedish) since, depending on 
#    the country of origin, the elavation column name differs.
def load_data(df,name,country):
    
    # Here we create the start date.
    start_Date = df.at[0,'timestamp']
    
    # Here we create the end date.
    end_Date = df.at[df.shape[0]-1,'timestamp']
    
    # Here we create the amount of transmissions belonging to the tracker.
    transmission_Count = df.shape[0]
    
    #Create a new tracker, this is only done once 
    tracker = Tracker(study_name = df.at[0,'study-name'],
                      individual_taxon_canonical_name = df.at[0,'individual-taxon-canonical-name'],
                      individual_local_identifier = df.at[0,'individual-local-identifier'],
                      start_date = start_Date,
                      end_date = end_Date,
                      name = name,
                      transmission_Count = transmission_Count).save()

    
    # Create an empty list of transmissions to which will append the new transmissions 
    # after they have been created. This list will be passed to the mongodb bulk insert feature.
    transmissions = []
    
    # Print when list appending process starts. 
    print('Start appending transmissions to list from: ' + str(name) )
    
    # For each row in the dataframe the following code is executed.
    for index,row in df.iterrows():
        
        # Create geometry document for Swedish sets in which we pass the required values.
        # NOTE: To use Geometry queries you have to insert te longitude value first.
        geometry = Geometry(coord = [row['location-long'],row['location-lat']],
                            alt = row['height-above-ellipsoid'])
        
        # Create the metadata document in which we pass the required values.
        metadata = TrackerMetadata(visible = row['visible'],
                                   sensor_type = row['sensor-type'],
                                   tag_voltage = row['tag-voltage'])
        
        # Create the speed document in which we pass the required values.
        speed = Speed(ground_speed = row['ground-speed'])
        
        # Create transmission document and append them to the transmissions list.
        transmissions.append(Transmission(event_id = row['event-id'],
                                          timestamp = row['timestamp'],
                                          geometry = geometry,speed = speed,
                                          metadata = metadata,tracker = tracker))
        
    # Print when list appending is done.
    print('Bulk inserting: '+ str(transmission_Count) + ' transmissions from: ' + str(name) )
        
    # Bulk insert the populated transmissions list.
    Transmission.objects.insert(transmissions,load_bulk=True)

    # Print if insert is succesfull.
    print("Done inserting "+ str(len(df.index)) + " transmissions")
    

In [19]:
load_data(SW_Crane_Frida,"Frida","sw") # Load the Crane Dataframe using the load_data function.

Start appending transmissions to list from: Frida
Bulk inserting: 123805 transmissions from: Frida
Done inserting 123805 transmissions


In [20]:
# Query the Crane tracker and only obtain the name and id.
Tracker.objects(name = 'Frida').only('name','id').to_json()

'[{"_id": {"$oid": "5e8f0e750b0a0aab0461a0e0"}, "name": "Frida"}]'

In [21]:
#Qurey to return first 10 items related to Crane:"Frida"
Transmission.objects(tracker = '5e8f0e750b0a0aab0461a0e0')[:10].to_json()

'[{"_id": {"$oid": "5e8f0eb50b0a0aab0461a0e1"}, "event_id": 1154727247, "timestamp": {"$date": 1374375992000}, "geometry": {"coord": {"type": "Point", "coordinates": [13.583908, 57.503796]}, "alt": 193.0}, "speed": {"ground_speed": 0.0}, "metadata": {"visible": true, "sensor_type": "gps", "tag_voltage": 4110.0}, "tracker": {"$oid": "5e8f0e750b0a0aab0461a0e0"}}, {"_id": {"$oid": "5e8f0eb50b0a0aab0461a0e2"}, "event_id": 1154727246, "timestamp": {"$date": 1374378694000}, "geometry": {"coord": {"type": "Point", "coordinates": [13.578312, 57.504063]}, "alt": 194.0}, "speed": {"ground_speed": 0.5144000000000001}, "metadata": {"visible": true, "sensor_type": "gps", "tag_voltage": 4100.0}, "tracker": {"$oid": "5e8f0e750b0a0aab0461a0e0"}}, {"_id": {"$oid": "5e8f0eb50b0a0aab0461a0e3"}, "event_id": 1154727245, "timestamp": {"$date": 1374379629000}, "geometry": {"coord": {"type": "Point", "coordinates": [13.578205, 57.50415]}, "alt": 199.0}, "speed": {"ground_speed": 0.0}, "metadata": {"visible": 

In [22]:
Transmission.objects(tracker = '5e8f0e750b0a0aab0461a0e0').explain() # Check execution speed of query pre index.

{'queryPlanner': {'plannerVersion': 1,
  'namespace': 'Crane_Databasa.transmission',
  'indexFilterSet': False,
  'parsedQuery': {'tracker': {'$eq': ObjectId('5e8f0e750b0a0aab0461a0e0')}},
  'winningPlan': {'stage': 'COLLSCAN',
   'filter': {'tracker': {'$eq': ObjectId('5e8f0e750b0a0aab0461a0e0')}},
   'direction': 'forward'},
  'rejectedPlans': []},
 'executionStats': {'executionSuccess': True,
  'nReturned': 123805,
  'executionTimeMillis': 66,
  'totalKeysExamined': 0,
  'totalDocsExamined': 123805,
  'executionStages': {'stage': 'COLLSCAN',
   'filter': {'tracker': {'$eq': ObjectId('5e8f0e750b0a0aab0461a0e0')}},
   'nReturned': 123805,
   'executionTimeMillisEstimate': 50,
   'works': 123807,
   'advanced': 123805,
   'needTime': 1,
   'needYield': 0,
   'saveState': 967,
   'restoreState': 967,
   'isEOF': 1,
   'invalidates': 0,
   'direction': 'forward',
   'docsExamined': 123805},
  'allPlansExecution': []},
 'serverInfo': {'host': 'geostack-system',
  'port': 27017,
  'version

In [23]:
Transmission.create_index([('geometry.coord.coordinates','2d')]) # Create 2D index on Coordinates fields.

'geometry.coord.coordinates_2d'

In [24]:
Transmission.create_index(('tracker')) # Create index on tracker fields.

'tracker_1'

In [25]:
Transmission.create_index(('timestamp')) # Create index on timestamp fields.

'timestamp_1'

In [26]:
Transmission.objects(tracker = '5e8f0e750b0a0aab0461a0e0').explain() # Check execution speed of query post index.

{'queryPlanner': {'plannerVersion': 1,
  'namespace': 'Crane_Databasa.transmission',
  'indexFilterSet': False,
  'parsedQuery': {'tracker': {'$eq': ObjectId('5e8f0e750b0a0aab0461a0e0')}},
  'winningPlan': {'stage': 'FETCH',
   'inputStage': {'stage': 'IXSCAN',
    'keyPattern': {'tracker': 1},
    'indexName': 'tracker_1',
    'isMultiKey': False,
    'multiKeyPaths': {'tracker': []},
    'isUnique': False,
    'isSparse': False,
    'isPartial': False,
    'indexVersion': 2,
    'direction': 'forward',
    'indexBounds': {'tracker': ["[ObjectId('5e8f0e750b0a0aab0461a0e0'), ObjectId('5e8f0e750b0a0aab0461a0e0')]"]}}},
  'rejectedPlans': []},
 'executionStats': {'executionSuccess': True,
  'nReturned': 123805,
  'executionTimeMillis': 174,
  'totalKeysExamined': 123805,
  'totalDocsExamined': 123805,
  'executionStages': {'stage': 'FETCH',
   'nReturned': 123805,
   'executionTimeMillisEstimate': 121,
   'works': 123806,
   'advanced': 123805,
   'needTime': 0,
   'needYield': 0,
   'sa