In [1]:
import datetime
import tiledb
import numpy as np
import pandas as pd
import logging
from typing import Union
from edid import find_station_edid
import straintiledbarray 

logger = logging.getLogger(__name__)


In [31]:
#test reading from s3
import importlib
importlib.reload(straintiledbarray)
edid = find_station_edid('PB','B005')
uri = f"s3://tiledb-strain/{edid}_level2.tdb"
print(uri)
array = straintiledbarray.StrainTiledbArray(uri=uri, location='s3', period=300)
#array.print_schema()
data_types =  ['2Ene']#, 'Eee+Enn', 'Eee-Enn']
series_name = 'microstrain'
attrs = ['data','quality','level','version']
start ="2022-01-01 00:00:00"
end = "2022-02-01 00:00:00"
reader = straintiledbarray.Reader(array)
df = reader.to_df(data_types=data_types,
                              timeseries=series_name,
                              attrs=attrs,
                              start=start,
                              end=end,
                              reindex=False)

s3://tiledb-strain/01GQH5RTERZN57RCDY4MVG0JF9_level2.tdb


2023-02-09 14:28:46 INFO: Query complete, expected 8928 and returned 8929


In [32]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,data,quality,level,version
data_type,timeseries,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2Ene,microstrain,2022-01-01 00:00:00,17.18466,g,2a,2022365042600
2Ene,microstrain,2022-01-01 00:05:00,17.18376,g,2a,2022365042600
2Ene,microstrain,2022-01-01 00:10:00,17.18300,g,2a,2022365042600
2Ene,microstrain,2022-01-01 00:15:00,17.18235,g,2a,2022365042600
2Ene,microstrain,2022-01-01 00:20:00,17.18149,g,2a,2022365042600
2Ene,microstrain,...,...,...,...,...
2Ene,microstrain,2022-01-31 23:40:00,17.35319,g,2a,2022365042600
2Ene,microstrain,2022-01-31 23:45:00,17.35282,g,2a,2022365042600
2Ene,microstrain,2022-01-31 23:50:00,17.35251,g,2a,2022365042600
2Ene,microstrain,2022-01-31 23:55:00,17.35207,g,2a,2022365042600


In [27]:
def reindex_data_types(df: pd.DataFrame,
                       attr: str):
    # removes a multi-index and makes each data_type/data a column
    data_types = list(df.index.get_level_values(0).unique())
    for data_type in data_types:
        df_data_type = df.xs(data_type, level='data_type')[attr].droplevel(level=0)
        df_data_type.name = data_type
        if data_type == data_types[0]:
            df2 = df_data_type
        else:
            df2 = pd.concat([df2, df_data_type], axis=1)
    return df2

reindex_data_types(df,'level')

time
2022-12-01 00:00:00    2a
2022-12-01 00:05:00    2a
2022-12-01 00:10:00    2a
2022-12-01 00:15:00    2a
2022-12-01 00:20:00    2a
                       ..
2022-12-30 23:30:00    2a
2022-12-30 23:35:00    2a
2022-12-30 23:40:00    2a
2022-12-30 23:45:00    2a
2022-12-30 23:50:00    2a
Name: 2Ene, Length: 8639, dtype: object

In [29]:
array.print_schema()

ArraySchema(
  domain=Domain(*[
    Dim(name='data_type', domain=('', ''), tile=None, dtype='|S0', var=True, filters=FilterList([ZstdFilter(level=7), ])),
    Dim(name='timeseries', domain=('', ''), tile=None, dtype='|S0', var=True, filters=FilterList([ZstdFilter(level=7), ])),
    Dim(name='time', domain=(0, 4102444800000), tile=86400000, dtype='int64', filters=FilterList([DoubleDeltaFilter(), ZstdFilter(level=7), ])),
  ]),
  attrs=[
    Attr(name='data', dtype='float64', var=False, nullable=False, filters=FilterList([ZstdFilter(level=7), ])),
    Attr(name='quality', dtype='ascii', var=True, nullable=False, filters=FilterList([ZstdFilter(level=7), ])),
    Attr(name='level', dtype='ascii', var=True, nullable=False, filters=FilterList([ZstdFilter(level=7), ])),
    Attr(name='version', dtype='int64', var=False, nullable=False, filters=FilterList([ZstdFilter(level=7), ])),
  ],
  cell_order='row-major',
  tile_order='row-major',
  capacity=100000,
  sparse=True,
  allows_duplicates=Fa

In [33]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,data,quality,level,version
data_type,timeseries,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2Ene,microstrain,2022-01-01 00:00:00,17.18466,g,2a,2022365042600
2Ene,microstrain,2022-01-01 00:05:00,17.18376,g,2a,2022365042600
2Ene,microstrain,2022-01-01 00:10:00,17.18300,g,2a,2022365042600
2Ene,microstrain,2022-01-01 00:15:00,17.18235,g,2a,2022365042600
2Ene,microstrain,2022-01-01 00:20:00,17.18149,g,2a,2022365042600
2Ene,microstrain,...,...,...,...,...
2Ene,microstrain,2022-01-31 23:40:00,17.35319,g,2a,2022365042600
2Ene,microstrain,2022-01-31 23:45:00,17.35282,g,2a,2022365042600
2Ene,microstrain,2022-01-31 23:50:00,17.35251,g,2a,2022365042600
2Ene,microstrain,2022-01-31 23:55:00,17.35207,g,2a,2022365042600
