Author of original script: Philipp Meschenmoser, DBVIS, Uni Konstanz

Python wrapper with functions using Movebank's REST API to view available studies, read data and accept license terms programmatically

Acknowledgements to Anne K. Scharf and her great moveACC package, see https://gitlab.com/anneks/moveACC

In [1]:
import csv
import hashlib
import io
import json
import os
import requests
from datetime import datetime, timedelta

import keyring
import numpy as np
import pandas as pd

In [16]:
class MovebankAPI:
    def __init__(self, username, password, study_id=None):
        self.username = username
        self.password = password
        self.study_id= study_id

    def callMovebankAPI(self, params):
        """"
        params: Requests Movebank API with ((param1, value1), (param2, value2),).
        Return the API response as plain text.
        """
        response = requests.get(
            'https://www.movebank.org/movebank/service/direct-read',
            params=params,
            auth=(self.username, self.password))
        print("Request " + response.url)
        if response.status_code == 200:  # successful request
            if 'License Terms:' in str(response.content):
                # only the license terms are returned, hash and append them in a
                # subsequent request.
                # See also
                # https://github.com/movebank/movebank-api-doc/blob/master/movebank
                # api.md#read-and-accept-license-terms-using-curl
                print("Has license terms")
                hash = hashlib.md5(response.content).hexdigest()
                params = params + (('license-md5', hash), )
                # also attach previous cookie:
                response = requests.get(
                    'https://www.movebank.org/movebank/service/direct-read',
                    params=params,
                    cookies=response.cookies,
                    auth=(os.environ['mbus'], os.environ['mbpw']))
                if response.status_code == 403:  # incorrect hash
                    print("Incorrect hash")
                    return ''
            return response.content.decode('utf-8')
        print(str(response.content))
        return str(response.content)


    def getStudies(self):
        studies = self.callMovebankAPI(
            (('entity_type', 'study'), ('i_can_see_data', 'true'),
             ('there_are_data_which_i_cannot_see', 'false')))
        if len(studies) > 0:
            # parse raw text to dicts
            studies = csv.DictReader(io.StringIO(studies), delimiter=',')
            all_studies = [
                s for s in studies if s['i_can_see_data'] == 'true'
                and s['there_are_data_which_i_cannot_see'] == 'false'
            ]
            return all_studies

    @staticmethod
    def getStudiesBySensor(studies, sensorname='GPS'):
        return [s for s in studies if sensorname in s['sensor_type_ids']]

    def getIndividualsByStudy(self):
        individuals = self.callMovebankAPI(
            (('entity_type', 'individual'), ('study_id', self.study_id)))
        if len(individuals) > 0:
            return list(csv.DictReader(io.StringIO(individuals), delimiter=','))

    def getIndividualEvents(
        self, individual_id, sensor_type_id=653, transform=False
    ):
        """
        SENSORS
        ===============================================================================
        description,external_id,id,is_location_sensor,name
        "","bird-ring",397,true,"Bird Ring"
        "","gps",653,true,"GPS"
        "","radio-transmitter",673,true,"Radio Transmitter"
        "","argos-doppler-shift",82798,true,"Argos Doppler Shift"
        "","natural-mark",2365682,true,"Natural Mark"
        "","acceleration",2365683,false,"Acceleration"
        "","solar-geolocator",3886361,true,"Solar Geolocator"
        "","accessory-measurements",7842954,false,"Accessory Measurements"
        "","solar-geolocator-raw",9301403,false,"Solar Geolocator Raw"
        "","barometer",77740391,false,"Barometer"
        "","magnetometer",77740402,false,"Magnetometer"
        "","orientation",819073350,false,"Orientation"
        "","solar-geolocator-twilight",914097241,false,"Solar Geolocator Twilight"
        """
        params = (('entity_type', 'event'), ('study_id', self.study_id),
                  ('individual_id', individual_id),
                  ('sensor_type_id', sensor_type_id), ('attributes', 'all'))
        events_ = self.callMovebankAPI(params)
        if events_:
            events = list(csv.DictReader(io.StringIO(events_), delimiter=','))
            if sensor_type_id == 653 and transform:
                return self.transformRawGPS(events)
            elif sensor_type_id == 2365683 and transform:
                return self.transformRawACC(events)
            else:
                return events

    @staticmethod
    def transformRawGPS(gpsevents):
        # Returns a list of (ts, deployment_id, lat, long) tuples

        def transform(e):  # dimension reduction and data type conversion
            try:
                if len(e['location_lat']) > 0:
                    e['location_lat'] = float(e['location_lat'])
                if len(e['location_long']) > 0:
                    e['location_long'] = float(e['location_long'])
            except:
                print("Could not parse long/lat.")
            return e['timestamp'], e['deployment_id'], e['location_lat'], e[
                'location_long']

        return [transform(e) for e in gpsevents]


    @staticmethod
    def transformRawACC(accevents, unit='m/s2', sensitivity='high'):
        #  Transforms raw tri-axial acceleration from X Y Z X Y X Y Z to [(ts_interpol,
        # deployment, X', Y', Z'),...]
        #  X', Y', Z' are in m/s^2 or g. Assumes e-obs acceleration sensors.
        #  Acknowledgments to Anne K. Scharf and her great moveACC package, see
        # https://gitlab.com/anneks/moveACC

        ts_format = '%Y-%m-%d %H:%M:%S.%f'
        out = []

        if unit == 'g':
            unitfactor = 1
        else:
            unitfactor = 9.81

        tag_local_identifier = int(accevents[0]['tag_local_identifier'])
        slope = 0.001  # e-obs 1st generation, high sensitivity

        if tag_local_identifier <= 2241:
            if sensitivity == 'low':
                slope = 0.0027
        elif 2242 <= tag_local_identifier <= 4117:  # e-obs 2nd generation
            slope = 0.0022
        else:
            slope = 1 / 512

        for event in accevents:
            deploym = event['deployment_id']
            seconds = 1 / float(event['acceleration_sampling_frequency_per_axis'])
            parsedts = datetime.strptime(event['timestamp'],
                                         ts_format)  # start timestamp
            raw = list(map(int, event['accelerations_raw'].split()))

            #  derive in-between timestamps:
            ts = [
                parsedts + timedelta(seconds=seconds * x)
                for x in range(0, int(len(raw) / 3))
            ]

            #  transform XYZ list to list of (ts, deployment, x, y, z) tuples
            it = iter(raw)
            transformed = [(a.strftime(ts_format), deploym,
                            (b[0] - 2048) * slope * unitfactor,
                            (b[1] - 2048) * slope * unitfactor,
                            (b[2] - 2048) * slope * unitfactor)
                           for (a, b) in list(zip(ts, list(zip(it, it, it))))]
            out.append(transformed)
        return out

    @staticmethod
    def _pprint(list_):
        print(json.dumps(list_, indent=4))
        
    @staticmethod
    def to_pandas(list_, sensor_type=None, save_to=None, transformed=False):
        if sensor_type and sensor_type.lower() == 'acc' and transformed:
            arr = np.array(list_)
            m, n, r = arr.shape
            out_arr = np.column_stack(
                (np.repeat(np.arange(m), n), arr.reshape(m * n, -1))
            )
            df = pd.DataFrame(
                out_arr,
                columns=['idx', 'timestamp', 'deployment_id', 'AccX', 'AccY', 'AccZ'],
                
            )
            df.drop(columns=['idx'], inplace=True)
            df = df.astype(
                {
                    'timestamp': 'datetime64[ns]',
                    'deployment_id': 'int32',
                    'AccX': 'float32',
                    'AccY': 'float32',
                    'AccZ': 'float32'
                }
            )
        elif sensor_type and sensor_type.lower() == 'gps' and transformed:
            df = pd.DataFrame(
                list_,
                columns=['timestamp', 'deployment_id', 'location_lat', 'location_long']
            )
            df = df.astype(
                {
                    'timestamp': 'datetime64[ns]',
                    'deployment_id': 'int32',
                    'location_lat': 'float32',
                    'location_long': 'float32'
                }
            )
        else:
            df = pd.DataFrame(list_)
        if save_to:
            df.to_csv(save_to, index=False)
        return df

In [18]:
mb = MovebankAPI('malyetama', keyring.get_password('movebank', 'malyetama'), 1748526129)

In [None]:
# Return information about multiple studies studies
# allstudies = mb.getStudies()
# gpsstudies = mb.getStudiesBySensor(allstudies, 'GPS')
# mb.to_pandas(gpsstudies)

In [5]:
# Return a list of dictionaries of the individuals in a study
individuals = mb.getIndividualsByStudy()
mb._pprint(individuals)

Request https://www.movebank.org/movebank/service/direct-read?entity_type=individual&study_id=1748526129
[
    {
        "comments": "_id: kraken-769a9b6b",
        "death_comments": "",
        "earliest_date_born": "",
        "exact_date_of_birth": "",
        "id": "1788706997",
        "latest_date_born": "",
        "local_identifier": "CAT_001",
        "nick_name": "",
        "ring_id": "",
        "sex": "f",
        "taxon_canonical_name": "Felis catus",
        "timestamp_start": "2021-09-29 14:10:12.000",
        "timestamp_end": "2021-10-15 15:08:00.000",
        "number_of_events": "77451",
        "number_of_deployments": "2",
        "sensor_type_ids": "GPS,Acceleration",
        "taxon_detail": "domesticus"
    }
]


In [6]:
individual_id = 1788706997

In [160]:
# Get events (GPS) of an individual from a study
gpsevents = mb.getIndividualEvents(
    individual_id=individual_id,
    sensor_type_id=653,
    transform=False
)
gps_df = mb.to_pandas(gpsevents, sensor_type='gps', transformed=False)
gps_df

Request https://www.movebank.org/movebank/service/direct-read?entity_type=event&study_id=1748526129&individual_id=1788706997&sensor_type_id=653&attributes=all


Unnamed: 0,individual_id,deployment_id,tag_id,study_id,sensor_type_id,individual_local_identifier,tag_local_identifier,individual_taxon_canonical_name,comments,gps_hdop,gps_satellite_count,ground_speed,heading,height_above_ellipsoid,location_lat,location_long,tag_voltage,timestamp,event_id,visible
0,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919627,-78.912407,1000.0,2021-09-29 14:10:12.000,20412009544,true
1,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919627,-78.912408,1000.0,2021-09-29 14:10:14.000,20412009545,true
2,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919627,-78.912408,1000.0,2021-09-29 14:10:16.000,20412009546,true
3,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919628,-78.912408,1000.0,2021-09-29 14:10:18.000,20412009547,true
4,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919628,-78.91241,1000.0,2021-09-29 14:10:20.000,20412009548,true
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77446,1788706997,1788873621,1788820431,1748526129,653,CAT_001,3847,Felis catus,0,1.11,8,0.0,200.0,117.0,35.919688,-78.912253,1000.0,2021-10-15 15:07:52.000,20412596166,true
77447,1788706997,1788873621,1788820431,1748526129,653,CAT_001,3847,Felis catus,0,1.11,8,0.0,200.0,117.0,35.919683,-78.91225,1000.0,2021-10-15 15:07:54.000,20412596167,true
77448,1788706997,1788873621,1788820431,1748526129,653,CAT_001,3847,Felis catus,0,1.11,8,0.0,200.0,117.0,35.919682,-78.912247,1000.0,2021-10-15 15:07:56.000,20412596168,true
77449,1788706997,1788873621,1788820431,1748526129,653,CAT_001,3847,Felis catus,0,1.1,8,0.0,200.0,117.0,35.919683,-78.912257,1000.0,2021-10-15 15:07:58.000,20412596169,true


In [118]:
gps_df.to_csv('/Users/Felis.catus/Desktop/kraken_api_res.csv', index=False)

In [162]:
# gps_df['timestamp'] = gps_df['timestamp'].astype('datetime64[ns]').dt.tz_localize('utc').dt.tz_convert('America/New_York').dt.tz_localize(None)
# data = gps_df[(gps_df['timestamp'].dt.date).astype('string') == '2021-10-10']
data.loc[(data['gps_hdop'].astype('float32') < 1.3)]
# gps_df.loc[(gps_df['time'] >= start_time) & (gps_df['time'] <= end_time)]

Unnamed: 0,individual_id,deployment_id,tag_id,study_id,sensor_type_id,individual_local_identifier,tag_local_identifier,individual_taxon_canonical_name,comments,gps_hdop,gps_satellite_count,ground_speed,heading,height_above_ellipsoid,location_lat,location_long,tag_voltage,timestamp,event_id,visible
59399,1788706997,1788866146,1788862041,1748526129,653,CAT_001,3814,Felis catus,0,0.0,4,0.0,200.0,111.0,35.919567,-78.912373,1000.0,2021-10-10 14:47:33,20412490877,true
59400,1788706997,1788866146,1788862041,1748526129,653,CAT_001,3814,Felis catus,0,0.0,4,0.0,200.0,111.0,35.919577,-78.912362,1000.0,2021-10-10 14:47:35,20412490878,true
59417,1788706997,1788866146,1788862041,1748526129,653,CAT_001,3814,Felis catus,0,0.0,4,0.0,200.0,111.0,35.919602,-78.912452,1000.0,2021-10-10 14:48:09,20412490895,true
59921,1788706997,1788866146,1788862041,1748526129,653,CAT_001,3814,Felis catus,0,0.0,3,0.0,200.0,107.0,35.919567,-78.912298,1000.0,2021-10-10 15:05:25,20412491399,true
59922,1788706997,1788866146,1788862041,1748526129,653,CAT_001,3814,Felis catus,0,0.0,3,0.0,200.0,107.0,35.919557,-78.912297,1000.0,2021-10-10 15:05:27,20412491400,true
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64615,1788706997,1788866146,1788862041,1748526129,653,CAT_001,3814,Felis catus,0,1.17,6,0.0,200.0,91.0,35.919693,-78.912515,1000.0,2021-10-10 18:14:57,20412496093,true
64616,1788706997,1788866146,1788862041,1748526129,653,CAT_001,3814,Felis catus,0,1.17,6,0.0,200.0,91.0,35.919715,-78.912487,1000.0,2021-10-10 18:14:59,20412496094,true
64617,1788706997,1788866146,1788862041,1748526129,653,CAT_001,3814,Felis catus,0,1.17,6,0.0,200.0,91.0,35.919717,-78.912465,1000.0,2021-10-10 18:15:01,20412496095,true
64618,1788706997,1788866146,1788862041,1748526129,653,CAT_001,3814,Felis catus,0,1.17,6,0.0,200.0,91.0,35.919715,-78.91244,1000.0,2021-10-10 18:15:03,20412496096,true


In [116]:
gps_df['date'] = gps_df['timestamp'].astype('datetime64[ns]').dt.date.astype('datetime64[ns]')
gps_df['time'] = gps_df['timestamp'].astype('datetime64[ns]').dt.time
gps_df.time.unique()

array([datetime.time(10, 10, 12), datetime.time(10, 10, 14),
       datetime.time(10, 10, 16), ..., datetime.time(10, 2, 38),
       datetime.time(10, 49, 7), datetime.time(10, 49, 9)], dtype=object)

In [50]:
gps_df.to_csv('kraken_full.csv')

In [None]:
# Get events (ACC) of an individual from a study
accevents = mb.getIndividualEvents(
    individual_id=individual_id,
    sensor_type_id=2365683,
    transform=True,
)
acc_df = mb.to_pandas(accevents, sensor_type='acc')
acc_df

In [44]:
# out = mb.callMovebankAPI((('entity_type', 'event'), ('study_id', 1748526129), ('sensor_type_id', 653), ('attributes', 'all')))
# list(csv.DictReader(io.StringIO(out), delimiter=','))