# DIMS.FARM REST API example

In [1]:
import os
import json

import pandas as pd
from requests import HTTPError

from watobs.datafarm import DatafarmRepository

api_key = os.getenv("DATAFARM_API_KEY")
assert api_key is not None


### Connect to DIMS.FARM REST API

In [2]:
dfr = DatafarmRepository(api_key)
dfr.connect()

### List available time series

In [6]:
time_series_list = dfr.list_time_series()
time_series_list.tail()

Unnamed: 0_level_0,ID,EntityID,Touched,IDName,IDDescription,LocationID,TimeSeriesParameterID,TimeSeriesMediaID,TimeSeriesUnitID,TimeSeriesDataTypeID,TimeSeriesDatasourceID,TimeSeriesTypeID,TimeSeriesStatusID,TimeSeriesDataProviderID,TimeSeriesDataProviderArgument,DataExpectedCount,DataArchiveAfter,DataDeleteAfter
GUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
{35D46EDD-C352-11ED-B2F7-1831BF2DC749},2087,TNWB_wind_RVO-FUGRO_unfiltered_WS-120,2023-03-20 18:25:13,TNWB_wind_RVO-FUGRO_unfiltered_WS-120,Wind speed,TNWB,WS,wind,m/s,Unknown,,RVO-FUGRO,unfiltered,,,,,
{35D46EDF-C352-11ED-B2F7-1831BF2DC749},2088,TNWB_wind_RVO-FUGRO_unfiltered_WS-130,2023-03-20 18:25:14,TNWB_wind_RVO-FUGRO_unfiltered_WS-130,Wind speed,TNWB,WS,wind,m/s,Unknown,,RVO-FUGRO,unfiltered,,,,,
{35D46EE1-C352-11ED-B2F7-1831BF2DC749},2089,WHI_waves_CMEMS_unfiltered_Hm0,2023-03-20 18:25:14,WHI_waves_CMEMS_unfiltered_Hm0,Spectral significant wave height,WHI,Hm0,waves,m,Unknown,,CMEMS,unfiltered,,,,,
{3C93FFDB-C352-11ED-B2F7-1831BF2DC749},2090,WHI_waves_CMEMS_unfiltered_Tz,2023-03-20 18:25:15,WHI_waves_CMEMS_unfiltered_Tz,Average zero crossing wave period (Tz),WHI,Tz,waves,s,Unknown,,CMEMS,unfiltered,,,,,
{7B4D3518-EE55-11ED-B2FB-1831BF2DC749},2091,testapi.insert,2023-05-25 12:21:41,testapi.insert,,,,,,Unknown,,,,,,,,


In [7]:
dfr.get_statistics(time_series_list['EntityID'].iloc[-10:])

Unnamed: 0_level_0,First,Last,DailyCount
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
TNWB_wind_RVO-FUGRO_unfiltered_WS-70,2019-06-19,2021-02-16,144
TNWB_wind_RVO-FUGRO_unfiltered_WS-80,2019-06-19,2021-02-16,144
TNWB_wind_RVO-FUGRO_unfiltered_WS-90,2019-06-19,2021-02-16,144
TNWB_wind_RVO-FUGRO_unfiltered_WS-100,2019-06-19,2021-02-16,144
TNWB_wind_RVO-FUGRO_unfiltered_WS-110,2019-06-19,2021-02-16,144
TNWB_wind_RVO-FUGRO_unfiltered_WS-120,2019-06-19,2021-02-16,144
TNWB_wind_RVO-FUGRO_unfiltered_WS-130,2019-06-19,2021-02-16,144
WHI_waves_CMEMS_unfiltered_Hm0,2011-05-19,2022-10-01,48
WHI_waves_CMEMS_unfiltered_Tz,2011-05-19,2022-10-01,48
testapi.insert,2012-08-23,2023-05-13,144


### Get time series data

In [11]:
data = dfr.get_data(
    time_series_id="testapi.insert",
    limit=5,
    ascending=True
)
data

Unnamed: 0_level_0,Data,QualityTxt
RefDateTimeRef,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-09-28 23:26:48,637.0,ok
2022-09-28 23:26:48,637.0,ok
2022-09-28 23:56:40,637.0,ok
2022-09-28 23:56:50,637.0,ok
2022-09-28 23:56:50,637.0,ok


### Delete data in a given range:

In [10]:
start, end = data.index[0], data.index[2]
print(f"Deleting data in range [{start}, {end})")
res = dfr.delete_data("testapi.insert", start=start, end=end)
res.json()

Deleting data in range [2012-08-15 09:23:56, 2014-11-02 09:23:56)


{'result': 'delete range successful'}

In [12]:
timestamps = data.index[:3]
print(f"Deleting data for timestamps {timestamps}")
res = dfr.delete_data("testapi.insert", timestamps=timestamps)
res.json()

Deleting data for timestamps DatetimeIndex(['2022-09-28 23:26:48', '2022-09-28 23:26:48',
               '2022-09-28 23:56:40'],
              dtype='datetime64[ns]', name='RefDateTimeRef', freq=None)


{'result': '3 items deleted'}

In [13]:
res.json()

{'result': '3 items deleted'}

Check that the data has been deleted:

In [14]:
data = dfr.get_data(
    time_series_id="testapi.insert",
    limit=10,
    ascending=True
)
data

Unnamed: 0_level_0,Data,QualityTxt
RefDateTimeRef,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-09-28 23:26:48,637.0,ok
2022-09-28 23:26:48,637.0,ok
2022-09-28 23:56:40,637.0,ok
2022-09-28 23:56:50,637.0,ok
2022-09-28 23:56:50,637.0,ok
2022-09-29 00:26:43,637.0,ok
2022-09-29 00:26:48,637.0,ok
2022-09-29 00:26:48,637.0,ok
2022-09-29 00:56:40,637.0,ok
2022-09-29 00:56:50,637.0,ok


### Insert time series data

Here we prepare some data to insert in form of a dataframe.

In [31]:
import random

random_date = lambda: pd.Timestamp.now() - pd.Timedelta(days=3000 + random.randint(1, 1000))
rows = 5
new_data = pd.DataFrame({
    "TimeStamp": [random_date() for _ in range(rows)],
    "Data": [random.random() for _ in range(rows)],
    "Quality": ["ok"] * rows,
})
new_data.loc[0, "Data"] = None
new_data


Unnamed: 0,TimeStamp,Data,Quality
0,2013-04-09 12:08:34.569015,,ok
1,2013-01-10 12:08:34.569137,0.739488,ok
2,2013-11-30 12:08:34.569167,0.339588,ok
3,2013-04-08 12:08:34.569188,0.414195,ok
4,2012-09-03 12:08:34.569207,0.719339,ok


To insert the data we use the `insert_data` method:

In [32]:
try:
    res = dfr.insert_data("testapi.insert", new_data, bulk_insert=True)
except HTTPError as e:
    print(e)

In [33]:
res.json()

{'ItemsWritten': 5}

We can peek at the json body for the call to the API. The NaN value is represented as {"N": 1, "V": 0}.

In [34]:
json.loads(res.request.body)

{'BulkInsert': True,
 'TimeSeriesName': 'testapi.insert',
 'TimeStamp': ['2013-04-09T12:08:34.569015Z',
  '2013-01-10T12:08:34.569137Z',
  '2013-11-30T12:08:34.569167Z',
  '2013-04-08T12:08:34.569188Z',
  '2012-09-03T12:08:34.569207Z'],
 'Data': [{'N': 1, 'V': 0.0},
  {'N': 0, 'V': 0.739487639991458},
  {'N': 0, 'V': 0.3395877725181353},
  {'N': 0, 'V': 0.414195228373305},
  {'N': 0, 'V': 0.7193391994039372}],
 'QualityLevel': [0, 0, 0, 0, 0]}

### Close connection

In [11]:
dfr.close()

# Now this will fail:
try:
    dfr.list_time_series()
except HTTPError as e:
    print(e)

401 Client Error: Unauthorized for url: https://apidevtest.datafarm.work/api/List/TimeSeries/


In [12]:
# Reconnect and try again
dfr.connect()
dfr.list_time_series().head()
dfr.close()

### Using the context manager

In [13]:

with DatafarmRepository(api_key) as dfr:
    data = dfr.get_data(
        time_series_id="testapi.insert",
        limit=2
    )
data

Unnamed: 0_level_0,Data,QualityTxt
RefDateTimeRef,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-08-14 09:58:24,0.727511,ok
2014-12-15 09:58:24,0.024472,ok
