In [37]:
from pymongo import MongoClient
from pymongo.collection import Collection
from pymongo.errors import CollectionInvalid
from bson.codec_options import CodecOptions
import pandas as pd

In [15]:

from data_io_nbdev.config.db import DBConfig
from data_io_nbdev.data.core import (
    DataFrameDoc,
    ObservationMeta,
    PoolQuery,
    veos_lifetime_start_date,
    veos_lifetime_end_date
)
from data_io_nbdev.data.external.pandas_utils import eos_df_to_nested_dict

In [9]:

mongo_url = "mongodb://" + "localhost" + ":" + "27017"
client = MongoClient(
    mongo_url,
    username="",
    password="",
)  # mongo_url = 'mongodb://host.docker.internal:27017/'
version = client.server_info()["version"]
print(f"MongoDB version: {version}")

MongoDB version: 7.0.4


In [10]:

databases = client.list_database_names()
print(databases)

['admin', 'config', 'db', 'ddpg_db', 'eos', 'eos_dill', 'local', 'mongo_bank', 'mongo_book', 'mydb', 'test', 'test_episode_db']


In [13]:
db = client['eos_dill']

try:
    db.create_collection(
                    "coll_episodes",
                    timeseries={
                        "timeField": "timestamp",  # timestamp as timeField
                        "metaField": "meta",  # plot as meta field
                        "granularity": "seconds",
                    },
                    codec_options=CodecOptions(tz_aware=True),
    				expireAfterSeconds=60*60*24*7*365*3
    )
except CollectionInvalid:
    print(f"coll_episodes exists.")


coll_episodes exists.


In [17]:
collection: Collection[DataFrameDoc] = db.get_collection("coll_episodes")

In [26]:
query = PoolQuery(
	vehicle="VB7_FIELD",
	driver="wang-kai",
	episodestart_start=veos_lifetime_start_date,
	episodestart_end=veos_lifetime_end_date,
	timestamp_start=veos_lifetime_start_date,
	timestamp_end=veos_lifetime_end_date
)

doc_query = {
                "$and": [
                    {"meta.vehicle": query.vehicle},
                    {"meta.driver": query.driver},
                    {"meta.episodestart": {"$gt": query.episodestart_start}},
                    {"meta.episodestart": {"$lt": query.episodestart_end}},
                    {"meta.timestamp": {"$gt": query.timestamp_start}},
                    {"meta.timestamp": {"$lt": query.timestamp_end}},
                ]
            }

doc_query


{'$and': [{'meta.vehicle': 'VB7_FIELD'},
  {'meta.driver': 'wang-kai'},
  {'meta.episodestart': {'$gt': Timestamp('2021-01-01 00:00:00+0800', tz='Asia/Shanghai')}},
  {'meta.episodestart': {'$lt': Timestamp('2031-12-31 00:00:00+0800', tz='Asia/Shanghai')}},
  {'meta.timestamp': {'$gt': Timestamp('2021-01-01 00:00:00+0800', tz='Asia/Shanghai')}},
  {'meta.timestamp': {'$lt': Timestamp('2031-12-31 00:00:00+0800', tz='Asia/Shanghai')}}]}

In [31]:
cnt = collection.count_documents(doc_query)
cnt

3785

In [44]:
cursor = collection.aggregate([])
batch = list(cursor)

In [53]:
from data_io_nbdev.data.time import timezones
for doc in collection.find():
	doc['meta']['site'] = {'abbr': 'at', 'name':'Anting', 'cname': '安亭', 'tz': timezones["at"].key}

anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting
anting

In [45]:
df  = pd.DataFrame(batch).drop('_id', axis=1)
df

Unnamed: 0,timestamp,meta,observation
0,2023-08-28 15:27:46.985,"{'action_specs': {'action_column_number': 17, ...","{'action': {'r0': {'0': 0.010279752314090729, ..."
1,2023-08-28 15:27:51.548,"{'action_specs': {'action_column_number': 17, ...","{'action': {'r0': {'0': -0.05427807196974754, ..."
2,2023-08-28 15:27:54.590,"{'action_specs': {'action_column_number': 17, ...","{'action': {'r0': {'0': -0.0393977016210556, '..."
3,2023-08-28 15:27:43.941,"{'action_specs': {'action_column_number': 17, ...","{'action': {'r0': {'0': 0.08541138470172882, '..."
4,2023-08-28 15:27:37.856,"{'action_specs': {'action_column_number': 17, ...","{'action': {'r0': {'0': 0.059804562479257584, ..."
...,...,...,...
3780,2023-11-21 09:34:18.000,"{'action_specs': {'action_column_number': 17, ...","{'action': {'r0': {'0': 1.044358730316162, '1'..."
3781,2023-11-21 09:34:22.000,"{'action_specs': {'action_column_number': 17, ...","{'action': {'r0': {'0': 1.0344882011413574, '1..."
3782,2023-11-21 09:34:25.000,"{'action_specs': {'action_column_number': 17, ...","{'action': {'r0': {'0': 1.047924280166626, '1'..."
3783,2023-11-21 09:34:29.000,"{'action_specs': {'action_column_number': 17, ...","{'action': {'r0': {'0': 1.030140995979309, '1'..."


In [46]:
db_new = client['eos_dill_new']

try:
    db_new.create_collection(
                    "coll_episodes",
                    timeseries={
                        "timeField": "timestamp",  # timestamp as timeField
                        "metaField": "meta",  # plot as meta field
                        "granularity": "seconds",
                    },
                    codec_options=CodecOptions(tz_aware=True),
    				expireAfterSeconds=60*60*24*7*365*3
    )
except CollectionInvalid:
    print(f"coll_episodes exists.")


coll_episodes exists.


In [47]:

collection_new: Collection[DataFrameDoc] = db_new.get_collection("coll_episodes")

In [48]:

# generate indices info (vehicle, driver, episodestart, timestamp') from DataFrame MultiIndex for meta info
indices_dict = [
    {df.index.names[i]: level for i, level in enumerate(levels)}
    for levels in df.index
]
docs = [
    DataFrameDoc(
        timestamp=idx[
            "timestamp"
        ]  # redundant, same as in meta['timestamp'] and 'observation'
        .to_pydatetime()
        .replace(
            microsecond=0  # mongodb timestamp is in BSON Date format, doesn't support microsecond,
        ),  # but only for timestamp, not necessary for timestamps as timestep data
        meta={
            **idx,
            **(
                self.meta.model_dump()
            ),  # site will dump tz as IANA string as defined in Eoslocation class
        },  # merge two dicts into meta: df.index + ObservationMeta
        observation=dict_nested[key],
    )
    for (idx, key) in zip(indices_dict, dict_nested)
]  # list of records, each record is a dict of timestamp, meta, observation (quadruple with timestamp)
# each row in rows will be a document in MongoDB
# docs = [{'timestamp': idx["timestamp"].to_pydatetime(),  # redundant, same as in meta['timestamp']
#         'meta': {**idx, **(self.meta.model_dump())},  # merge two dicts into meta: df.index + ObservationMeta
#         'observation': dict_nested[key]}
#         for (idx, key) in zip(indices_dict, dict_nested)]
# list of records, each record is a dict of timestamp, meta, observation (quadruple with timestamp)

# use typed collection for type checking
try:
    result = self.collection.insert_many(docs)
except Exception as e:
    raise e
assert result.acknowledged is True, "Record not stored in MongoDB!"
rec_inserted = self.collection.find({"_id": {"$in": result.inserted_ids}})
# assert (
#         set(rec_inserted).symmetric_difference(set(docs))
#         == set()  # result has to be an empty set if all records are inserted
# ), "Record stored is not the same as the one inputted!"
inserted_cnt = len(list(rec_inserted))
self.cnt = self.cnt + inserted_cnt
self.logger.info(
    f"'header': 'deposit item number', " f"'inserted item': '{inserted_cnt}'",
    extra=self.dict_logger,
)

TypeError: 'int' object is not iterable