In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Pool

> ABC Pool

In [None]:
#| default_exp storage.pool.pool

In [None]:
#| export
from __future__ import annotations

In [None]:
#| export
import abc
import weakref
from dataclasses import dataclass
from typing import Any, ClassVar, Generic, Optional, Union, get_args, get_origin
import pandas as pd

In [None]:
#| export
from data_io_nbdev.data.core import ItemT, PoolQuery

In [None]:
#| export
@dataclass
class Pool(abc.ABC, Generic[ItemT]):
    """
    The abstract storage for pooling the real-time data from the cloud.

    Pool provides the following features:

        - load(): initialize the pool interface
        - close(): destructor
        - store(): Store the data in the pool
        - delete(id): remove the data with id in the pool
        - find(id): Get the data from the pool
        - sample(size, query: Optional[dict]=None): Sample a batch of data from the pool
        - count(query: Optional[dict] = None): Count the number of data in the pool

    Attributes:
        - _type_T: the type of the data stored in the pool
        - _cnt: the number of records in the pool



    """

    _type_T: ClassVar[str] = ""
    _cnt: int = 0  # number of records in the pool. Calling count() is expensive and it will update this.

    def __init_subclass__(cls):
        """get the concrete type in derived class"""
        cls._type_T = get_args(cls.__orig_bases__[0])[0].__name__  # type: ignore
        # print(get_origin(cls.__orig_bases__[0]).__name__)
        # print(cls.__bases__[0].__name__)  # type: ignore

        # print(
        #     f"Pool.__init_subclass__(): {cls._type_T} in {cls.__name__} from {cls.__base__.__name__}"
        # )

    def __post_init__(self):
        """User weakref finalizer to make sure close is called when the object is destroyed"""
        self._finalizer = weakref.finalize(self, self.close)

    @abc.abstractmethod
    def load(self):
        """Initialize the pool interface

        This function should:
            - connect to db
            - init
        """

    @abc.abstractmethod
    def close(self):
        """close the pool, for destructor"""

    @abc.abstractmethod
    def store(self, item: ItemT):
        """Deposit an item (record) into the pool"""

    @abc.abstractmethod
    def delete(self, idx):
        """delete an itme by id or name."""

    @abc.abstractmethod
    def _count(self, query: Optional[PoolQuery] = None) -> int:
        """
        Count the number of records in the db.

        query = {
            vehicle_id: str = "VB7",
            driver_id: str = "longfei-zheng",
            dt_start: datetime = None,
            dt_end: datetime = None,
            }
        """

    @property
    def cnt(self) -> int:
        """Number of records in the pool"""
        return self._cnt

    @cnt.setter
    def cnt(self, value: int):
        self._cnt = value

    @abc.abstractmethod
    def find(self, query: PoolQuery) -> Any:
        """Find an item by id or name."""

    @abc.abstractmethod
    def sample(
        self,
        size: int,  # number of required samples
        *,
        query: Optional[PoolQuery] = None,  # query to filter the records
    ) -> Optional[
        Union[pd.DataFrame, list[ItemT]]
    ]:  # return either a Pandas Dataframe or a list of records
        """Sample a size of records from the pool.

        Args:

            size: desired size of the samples
            rule: an optional dictionary specifying a rule or a pipeline in mongodb
            query: query to filter the records
            vehicle_id: str = "VB7",
            driver_id: str = "longfei-zheng",
            dt_start: datetime = None,
            dt_end: datetime = None,
            }
        """

    @abc.abstractmethod
    def __iter__(self) -> Any:
        """
        Iterate over the pool.
        """

    def __getitem__(self, query: PoolQuery) -> Any:
        """
        Get an item by id or name.
        """
        return self.find(query)

    def __len__(self):
        return self.cnt()

    def __repr__(self):
        return f"Pool(length: {self.cnt()})"

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
show_doc(Pool.load)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L58){target="_blank" style="float:right; font-size:smaller"}

### Pool.load

>      Pool.load ()

Initialize the pool interface

This function should:
    - connect to db
    - init

In [None]:
show_doc(Pool.close)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L67){target="_blank" style="float:right; font-size:smaller"}

### Pool.close

>      Pool.close ()

close the pool, for destructor

In [None]:
show_doc(Pool.store)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L71){target="_blank" style="float:right; font-size:smaller"}

### Pool.store

>      Pool.store (item:~ItemT)

Deposit an item (record) into the pool

In [None]:
show_doc(Pool.delete)   

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L75){target="_blank" style="float:right; font-size:smaller"}

### Pool.delete

>      Pool.delete (idx)

delete an itme by id or name.

In [None]:
show_doc(Pool._count)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L79){target="_blank" style="float:right; font-size:smaller"}

### Pool._count

>      Pool._count (query:Optional[data_io_nbdev.data.core.PoolQuery]=None)

Count the number of records in the db.

query = {
    vehicle_id: str = "VB7",
    driver_id: str = "longfei-zheng",
    dt_start: datetime = None,
    dt_end: datetime = None,
    }

In [None]:
show_doc(Pool.find)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L101){target="_blank" style="float:right; font-size:smaller"}

### Pool.find

>      Pool.find (query:data_io_nbdev.data.core.PoolQuery)

Find an item by id or name.

In [None]:
show_doc(Pool.sample)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L105){target="_blank" style="float:right; font-size:smaller"}

### Pool.sample

>      Pool.sample (size:int,
>                   query:Optional[data_io_nbdev.data.core.PoolQuery]=None)

Sample a size of records from the pool.

Args:

    size: desired size of the samples
    rule: an optional dictionary specifying a rule or a pipeline in mongodb
    query: query to filter the records 
    vehicle_id: str = "VB7",
    driver_id: str = "longfei-zheng",
    dt_start: datetime = None,
    dt_end: datetime = None,
    }

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| size | int |  | number of required samples |
| query | Optional[PoolQuery] | None |  |
| **Returns** | **Optional[Union[pd.DataFrame, list[ItemT]]]** |  | **return either a Pandas Dataframe or a list of records** |

In [None]:
show_doc(Pool.__iter__)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L128){target="_blank" style="float:right; font-size:smaller"}

### Pool.__iter__

>      Pool.__iter__ ()

Iterate over the pool.

In [None]:
show_doc(Pool.__getitem__)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L133){target="_blank" style="float:right; font-size:smaller"}

### Pool.__getitem__

>      Pool.__getitem__ (query:data_io_nbdev.data.core.PoolQuery)

Get an item by id or name.

In [None]:
show_doc(Pool.__len__)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L139){target="_blank" style="float:right; font-size:smaller"}

### Pool.__len__

>      Pool.__len__ ()

In [None]:
show_doc(Pool.__repr__)

---

[source](https://github.com/Binjian/data-io-nbdev/tree/main/blob/main/data_io_nbdev/storage/pool/pool.py#L142){target="_blank" style="float:right; font-size:smaller"}

### Pool.__repr__

>      Pool.__repr__ ()

Return repr(self).

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()