In [1]:
import csv
import hashlib
import io
import json
import numpy as np
import os
import requests
from datetime import datetime, timedelta
from pathlib import Path

import dateparser
import keyring
import numpy as np
import pandas as pd


class MovebankAPI:
    def __init__(self, username, password, study_id=None):
        self.username = username
        self.password = password
        self.study_id = study_id

    def callMovebankAPI(self, params):
        """"
        params: Requests Movebank API with ((param1, value1), (param2, value2),).
        Return the API response as plain text.
        """
        response = requests.get(
            'https://www.movebank.org/movebank/service/direct-read',
            params=params,
            auth=(self.username, self.password))
        print("Request " + response.url)
        if response.status_code == 200:  # successful request
            if 'License Terms:' in str(response.content):
                # only the license terms are returned, hash and append them in a
                # subsequent request.
                # See also
                # https://github.com/movebank/movebank-api-doc/blob/master/movebank
                # api.md#read-and-accept-license-terms-using-curl
                print("Has license terms")
                hash = hashlib.md5(response.content).hexdigest()
                params = params + (('license-md5', hash), )
                # also attach previous cookie:
                response = requests.get(
                    'https://www.movebank.org/movebank/service/direct-read',
                    params=params,
                    cookies=response.cookies,
                    auth=(self.username, self.password))
                if response.status_code == 403:  # incorrect hash
                    print("Incorrect hash")
                    return ''
            return response.content.decode('utf-8')
        print(str(response.content))
        return str(response.content)

    def getStudies(self):
        studies = self.callMovebankAPI(
            (('entity_type', 'study'), ('i_can_see_data', 'true'),
             ('there_are_data_which_i_cannot_see', 'false')))
        if len(studies) > 0:
            # parse raw text to dicts
            studies = csv.DictReader(io.StringIO(studies), delimiter=',')
            all_studies = [
                s for s in studies if s['i_can_see_data'] == 'true'
                and s['there_are_data_which_i_cannot_see'] == 'false'
            ]
            return all_studies

    @staticmethod
    def getStudiesBySensor(studies, sensorname='GPS'):
        return [s for s in studies if sensorname in s['sensor_type_ids']]

    def getIndividualsByStudy(self):
        individuals = self.callMovebankAPI(
            (('entity_type', 'individual'), ('study_id', self.study_id)))
        if len(individuals) > 0:
            return list(csv.DictReader(io.StringIO(individuals),
                                       delimiter=','))

    def getIndividualEvents(self,
                            individual_id,
                            sensor_type_id=653,
                            transform=False):
        """
        SENSORS
        ===============================================================================
        description,external_id,id,is_location_sensor,name
        "","bird-ring",397,true,"Bird Ring"
        "","gps",653,true,"GPS"
        "","radio-transmitter",673,true,"Radio Transmitter"
        "","argos-doppler-shift",82798,true,"Argos Doppler Shift"
        "","natural-mark",2365682,true,"Natural Mark"
        "","acceleration",2365683,false,"Acceleration"
        "","solar-geolocator",3886361,true,"Solar Geolocator"
        "","accessory-measurements",7842954,false,"Accessory Measurements"
        "","solar-geolocator-raw",9301403,false,"Solar Geolocator Raw"
        "","barometer",77740391,false,"Barometer"
        "","magnetometer",77740402,false,"Magnetometer"
        "","orientation",819073350,false,"Orientation"
        "","solar-geolocator-twilight",914097241,false,"Solar Geolocator Twilight"
        """
        params = (('entity_type', 'event'), ('study_id', self.study_id),
                  ('individual_id', individual_id),
                  ('sensor_type_id', sensor_type_id), ('attributes', 'all'))
        events_ = self.callMovebankAPI(params)
        if events_:
            events = list(csv.DictReader(io.StringIO(events_), delimiter=','))
            if sensor_type_id == 653 and transform:
                return self.transformRawGPS(events)
            elif sensor_type_id == 2365683 and transform:
                return self.transformRawACC(events)
            else:
                return events

    @staticmethod
    def transformRawGPS(gpsevents):
        # Returns a list of (ts, deployment_id, lat, long) tuples

        def transform(e):  # dimension reduction and data type conversion
            try:
                if len(e['location_lat']) > 0:
                    e['location_lat'] = float(e['location_lat'])
                if len(e['location_long']) > 0:
                    e['location_long'] = float(e['location_long'])
            except:
                print("Could not parse long/lat.")
            return e['timestamp'], e['deployment_id'], e['location_lat'], e[
                'location_long']

        return [transform(e) for e in gpsevents]

    @staticmethod
    def transformRawACC(accevents, unit='m/s2', sensitivity='high'):
        #  Transforms raw tri-axial acceleration from X Y Z X Y X Y Z to [(ts_interpol,
        # deployment, X', Y', Z'),...]
        #  X', Y', Z' are in m/s^2 or g. Assumes e-obs acceleration sensors.
        #  Acknowledgments to Anne K. Scharf and her great moveACC package, see
        # https://gitlab.com/anneks/moveACC

        ts_format = '%Y-%m-%d %H:%M:%S.%f'
        out = []

        if unit == 'g':
            unitfactor = 1
        else:
            unitfactor = 9.81

        tag_local_identifier = int(accevents[0]['tag_local_identifier'])
        slope = 0.001  # e-obs 1st generation, high sensitivity

        if tag_local_identifier <= 2241:
            if sensitivity == 'low':
                slope = 0.0027
        elif 2242 <= tag_local_identifier <= 4117:  # e-obs 2nd generation
            slope = 0.0022
        else:
            slope = 1 / 512

        for event in accevents:
            deploym = event['deployment_id']
            seconds = 1 / float(
                event['acceleration_sampling_frequency_per_axis'])
            parsedts = datetime.strptime(event['timestamp'],
                                         ts_format)  # start timestamp
            raw = list(map(int, event['accelerations_raw'].split()))

            #  derive in-between timestamps:
            ts = [
                parsedts + timedelta(seconds=seconds * x)
                for x in range(0, int(len(raw) / 3))
            ]

            #  transform XYZ list to list of (ts, deployment, x, y, z) tuples
            it = iter(raw)
            transformed = [(a.strftime(ts_format), deploym,
                            (b[0] - 2048) * slope * unitfactor,
                            (b[1] - 2048) * slope * unitfactor,
                            (b[2] - 2048) * slope * unitfactor)
                           for (a, b) in list(zip(ts, list(zip(it, it, it))))]
            out.append(transformed)
        return out

    @staticmethod
    def _pprint(list_):
        print(json.dumps(list_, indent=4))

    @staticmethod
    def to_pandas(list_, sensor_type=None, save_to=None, transformed=False):
        if sensor_type and sensor_type.lower() == 'acc' and transformed:
            arr = np.array(list_)
            m, n, r = arr.shape
            out_arr = np.column_stack((np.repeat(np.arange(m),
                                                 n), arr.reshape(m * n, -1)))
            df = pd.DataFrame(
                out_arr,
                columns=[
                    'idx', 'timestamp', 'deployment_id', 'AccX', 'AccY', 'AccZ'
                ],
            )
            df.drop(columns=['idx'], inplace=True)
            df = df.astype({
                'timestamp': 'datetime64[ns]',
                'deployment_id': 'int32',
                'AccX': 'float32',
                'AccY': 'float32',
                'AccZ': 'float32'
            })
        elif sensor_type and sensor_type.lower() == 'gps' and transformed:
            df = pd.DataFrame(list_,
                              columns=[
                                  'timestamp', 'deployment_id', 'location_lat',
                                  'location_long'
                              ])
            df = df.astype({
                'timestamp': 'datetime64[ns]',
                'deployment_id': 'int32',
                'location_lat': 'float32',
                'location_long': 'float32'
            })
        else:
            df = pd.DataFrame(list_)
        if save_to:
            df.to_csv(save_to, index=False)
        return df

In [2]:
class SimpleMovebankAPI:
    def __init__(self, mb):
        self.mb = mb

    def get_cat_id(self, simple_id=None, all_=False):
        individuals = mb.getIndividualsByStudy()
        if all_:
            return individuals
        else:
            individual_id = [
                x for x in individuals if x['local_identifier'] == simple_id
            ][0]['id']
            return individual_id

    def get_gps(self, individual_id, save=None):
        gpsevents = mb.getIndividualEvents(individual_id=individual_id,
                                           sensor_type_id=653,
                                           transform=False)
        gps_df = mb.to_pandas(gpsevents, sensor_type='gps', transformed=False)
        if save:
            ps_df.to_csv(save, index=False)
        return gps_df

    def get_acc(self, individual_id, save=None):
        accevents = mb.getIndividualEvents(
            individual_id=individual_id,
            sensor_type_id=2365683,
            transform=True,
        )
        acc_df = mb.to_pandas(accevents, sensor_type='acc', transformed=True)
        return acc_df
    
    @staticmethod
    def acc_section(acc_df, start_timestamp, duration, save=None):
        # start_timestamp format: 2021-09-29 14:10:12
        duration = [int(x) for x in '.'.join(duration.split(':')).split('.')]
        if len(duration) < 4:
            duration = duration + [0]
        end_timestamp = (dateparser.parse(start_timestamp) + \
         timedelta(
          hours=duration[0],
          minutes=duration[1],
          seconds=duration[2],
        milliseconds=duration[3])
         ).strftime('%Y-%m-%d %H:%M:%S.%f')

        acc_section = acc_df.loc[(acc_df['timestamp'] >= start_timestamp)
                            & (acc_df['timestamp'] <= end_timestamp)]
        if save:
            acc_section.to_csv(save, index=False)
        return acc_section

In [3]:
def add_video_time(df, video_date=None):
    """Adds a time column that shows the video time relative to clock time.

    Args:
        video_date (str): The date in which the video was recorded, formatted as `YYYY-mm-dd`.
    """
    df = df.astype({'timestamp': 'datetime64[ns]'})
    df = df.reset_index()
#     df['video_time'] = (df['timestamp'] - df['timestamp'][0])
    df['video_time'] = [str(x - df['timestamp'][0]).replace('0 days', video_date)
                        for x in df['timestamp']]
    df['video_time'] = df['video_time'].astype('datetime64[ns]')
    df = df[['timestamp', 'video_time', 'AccX', 'AccY', 'AccZ']]
    return df

In [4]:
mb = MovebankAPI('malyetama', keyring.get_password('movebank', 'malyetama'), 1748526129)
smb = SimpleMovebankAPI(mb)

In [5]:
cat_id = smb.get_cat_id(simple_id='CAT_001')

Request https://www.movebank.org/movebank/service/direct-read?entity_type=individual&study_id=1748526129


In [6]:
gps_df = smb.get_gps(individual_id=cat_id)
gps_df

Request https://www.movebank.org/movebank/service/direct-read?entity_type=event&study_id=1748526129&individual_id=1788706997&sensor_type_id=653&attributes=all


Unnamed: 0,individual_id,deployment_id,tag_id,study_id,sensor_type_id,individual_local_identifier,tag_local_identifier,individual_taxon_canonical_name,comments,gps_hdop,gps_satellite_count,ground_speed,heading,height_above_ellipsoid,location_lat,location_long,tag_voltage,timestamp,event_id,visible
0,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919627,-78.912407,1000.0,2021-09-29 14:10:12.000,20412009544,true
1,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919627,-78.912408,1000.0,2021-09-29 14:10:14.000,20412009545,true
2,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919627,-78.912408,1000.0,2021-09-29 14:10:16.000,20412009546,true
3,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919628,-78.912408,1000.0,2021-09-29 14:10:18.000,20412009547,true
4,1788706997,1788737103,1788703553,1748526129,653,CAT_001,3870,Felis catus,0,0.94,9,0.0,200.0,116.0,35.919628,-78.91241,1000.0,2021-09-29 14:10:20.000,20412009548,true
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77446,1788706997,1788873621,1788820431,1748526129,653,CAT_001,3847,Felis catus,0,1.11,8,0.0,200.0,117.0,35.919688,-78.912253,1000.0,2021-10-15 15:07:52.000,20412596166,true
77447,1788706997,1788873621,1788820431,1748526129,653,CAT_001,3847,Felis catus,0,1.11,8,0.0,200.0,117.0,35.919683,-78.91225,1000.0,2021-10-15 15:07:54.000,20412596167,true
77448,1788706997,1788873621,1788820431,1748526129,653,CAT_001,3847,Felis catus,0,1.11,8,0.0,200.0,117.0,35.919682,-78.912247,1000.0,2021-10-15 15:07:56.000,20412596168,true
77449,1788706997,1788873621,1788820431,1748526129,653,CAT_001,3847,Felis catus,0,1.1,8,0.0,200.0,117.0,35.919683,-78.912257,1000.0,2021-10-15 15:07:58.000,20412596169,true


In [7]:
acc_df = smb.get_acc(individual_id=cat_id)

Request https://www.movebank.org/movebank/service/direct-read?entity_type=event&study_id=1748526129&individual_id=1788706997&sensor_type_id=2365683&attributes=all


In [8]:
section_df_1 = smb.acc_section(
    acc_df,
    start_timestamp='2021-10-10 15:51:33',
    duration='00:47:26.365')
section_df_1 = add_video_time(df=section_df_1, video_date='2021-10-10')
section_df_1

Unnamed: 0,timestamp,video_time,AccX,AccY,AccZ
0,2021-10-10 15:51:33.000000,2021-10-10 00:00:00.000000,-43.228745,-44.005699,-44.307846
1,2021-10-10 15:51:33.062500,2021-10-10 00:00:00.062500,-43.595638,-42.171227,-43.185581
2,2021-10-10 15:51:33.125000,2021-10-10 00:00:00.125000,-43.876205,-41.674843,-43.185581
3,2021-10-10 15:51:33.187500,2021-10-10 00:00:00.187500,-43.077671,-43.293491,-44.199936
4,2021-10-10 15:51:33.250000,2021-10-10 00:00:00.250000,-44.329430,-44.005699,-44.199936
...,...,...,...,...,...
45393,2021-10-10 16:38:59.062500,2021-10-10 00:47:26.062500,-43.185581,-43.315075,-44.005699
45394,2021-10-10 16:38:59.125000,2021-10-10 00:47:26.125000,-43.185581,-43.315075,-44.005699
45395,2021-10-10 16:38:59.187500,2021-10-10 00:47:26.187500,-43.185581,-43.315075,-44.005699
45396,2021-10-10 16:38:59.250000,2021-10-10 00:47:26.250000,-43.185581,-43.315075,-44.005699


In [20]:
default_microseconds = dateparser.parse(str(section_df_1['video_time'][1])).microsecond

section_df_2 = smb.acc_section(
    acc_df,
    start_timestamp='2021-10-10 16:48:41',
    duration='00:32:32.602')

df_2_vid = add_video_time(df=section_df_2, video_date='2021-10-10')

section_df_2 = section_df_2.reset_index()
video_date = str(list(section_df_1['timestamp'])[-1].date())
ts_1 = dateparser.parse(str(list(section_df_2['timestamp'])[0])).time()

section_df_2['video_time'] = section_df_2['timestamp'] - timedelta(hours=ts_1.hour, minutes=ts_1.minute, seconds=ts_1.second)

ts_2 = dateparser.parse(str(list(section_df_1['video_time'])[-1])).time()
section_df_2['video_time'] = section_df_2['video_time'] + timedelta(minutes=ts_2.minute, seconds=ts_2.second, microseconds=ts_2.microsecond + default_microseconds)

section_df_2 = section_df_2[['timestamp', 'video_time', 'AccX', 'AccY', 'AccZ']]
section_df_2

Unnamed: 0,timestamp,video_time,AccX,AccY,AccZ
0,2021-10-10 16:48:41.000000,2021-10-10 00:47:26.375000,-44.156773,-42.883434,-44.005699
1,2021-10-10 16:48:41.062500,2021-10-10 00:47:26.437500,-44.156773,-42.797108,-43.940952
2,2021-10-10 16:48:41.125000,2021-10-10 00:47:26.500000,-44.092026,-42.861851,-44.005699
3,2021-10-10 16:48:41.187500,2021-10-10 00:47:26.562500,-44.135189,-42.969761,-43.984116
4,2021-10-10 16:48:41.250000,2021-10-10 00:47:26.625000,-44.199936,-42.948181,-43.897789
...,...,...,...,...,...
31221,2021-10-10 17:21:13.312500,2021-10-10 01:19:58.687500,-43.077671,-43.444565,-44.005699
31222,2021-10-10 17:21:13.375000,2021-10-10 01:19:58.750000,-43.077671,-43.444565,-44.005699
31223,2021-10-10 17:21:13.437500,2021-10-10 01:19:58.812500,-43.077671,-43.444565,-44.005699
31224,2021-10-10 17:21:13.500000,2021-10-10 01:19:58.875000,-43.077671,-43.444565,-44.005699


In [128]:
section_df_1['video_time'][:16]

0    2021-10-10 00:00:00.000000
1    2021-10-10 00:00:00.062500
2    2021-10-10 00:00:00.125000
3    2021-10-10 00:00:00.187500
4    2021-10-10 00:00:00.250000
5    2021-10-10 00:00:00.312500
6    2021-10-10 00:00:00.375000
7    2021-10-10 00:00:00.437500
8    2021-10-10 00:00:00.500000
9    2021-10-10 00:00:00.562500
10   2021-10-10 00:00:00.625000
11   2021-10-10 00:00:00.687500
12   2021-10-10 00:00:00.750000
13   2021-10-10 00:00:00.812500
14   2021-10-10 00:00:00.875000
15   2021-10-10 00:00:00.937500
Name: video_time, dtype: datetime64[ns]

In [21]:
df_ = pd.concat([section_df_1, section_df_2])
df_

Unnamed: 0,timestamp,video_time,AccX,AccY,AccZ
0,2021-10-10 15:51:33.000000,2021-10-10 00:00:00.000000,-43.228745,-44.005699,-44.307846
1,2021-10-10 15:51:33.062500,2021-10-10 00:00:00.062500,-43.595638,-42.171227,-43.185581
2,2021-10-10 15:51:33.125000,2021-10-10 00:00:00.125000,-43.876205,-41.674843,-43.185581
3,2021-10-10 15:51:33.187500,2021-10-10 00:00:00.187500,-43.077671,-43.293491,-44.199936
4,2021-10-10 15:51:33.250000,2021-10-10 00:00:00.250000,-44.329430,-44.005699,-44.199936
...,...,...,...,...,...
31221,2021-10-10 17:21:13.312500,2021-10-10 01:19:58.687500,-43.077671,-43.444565,-44.005699
31222,2021-10-10 17:21:13.375000,2021-10-10 01:19:58.750000,-43.077671,-43.444565,-44.005699
31223,2021-10-10 17:21:13.437500,2021-10-10 01:19:58.812500,-43.077671,-43.444565,-44.005699
31224,2021-10-10 17:21:13.500000,2021-10-10 01:19:58.875000,-43.077671,-43.444565,-44.005699


In [22]:
df_.to_csv(
    '10-10-2021_kraken-769a9b6b_ACC_LS_unlabeled.csv',
    index=False
)