In [1]:
# Install python package for cassandra:
!pip install cassandra-driver

[33mDEPRECATION: Loading egg at /opt/bitnami/python/lib/python3.11/site-packages/pip-23.3.1-py3.11.egg is deprecated. pip 23.3 will enforce this behaviour change. A possible replacement is to use pip for package installation..[0m[33m
[0mCollecting cassandra-driver
  Obtaining dependency information for cassandra-driver from https://files.pythonhosted.org/packages/08/c1/d4a5e4a7328c8d81aba19116ed9d213fb229893146a79152685e6cc18c0b/cassandra_driver-3.29.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading cassandra_driver-3.29.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.9 kB)
Collecting geomet<0.3,>=0.1 (from cassandra-driver)
  Obtaining dependency information for geomet<0.3,>=0.1 from https://files.pythonhosted.org/packages/c9/81/156ca48f950f833ddc392f8e3677ca50a18cb9d5db38ccb4ecea55a9303f/geomet-0.2.1.post1-py3-none-any.whl.metadata
  Downloading geomet-0.2.1.post1-py3-none-any.whl.metadata (1.0 kB)
Collecting click (from 

In [27]:
from cassandra.cqlengine import columns
from cassandra.cqlengine.models import Model


class GlobalMetrics(Model):
    """
    The model is meant to be used as a read-only model to retrieve the
    global metrics for the OpenSea data.

    Primary key (composite key): metric, timestamp
    Partition Key: metric
    Clustering Key: timestamp (DESC)
    """

    __table_name__ = "global_metrics"

    metric = columns.Text(primary_key=True)
    timestamp_at = columns.DateTime(
        primary_key=True, clustering_order="DESC", db_field="timestamp"
    )
    value = columns.Double()
    collection = columns.Text()


class CollectionMetrics(Model):
    """
    The model is meant to be used as a read-only model to retrieve the
    collection metrics for the OpenSea data.

    Primary key (composite key): (collection, metric), timestamp
    Partition Key: collection, metric
    Clustering Key: timestamp (DESC)
    """

    __table_name__ = "collections_metrics"

    collection = columns.Text(primary_key=True)
    metric = columns.Text(primary_key=True)
    timestamp_at = columns.DateTime(
        primary_key=True, clustering_order="DESC", db_field="timestamp"
    )
    value = columns.Double()
    asset_name = columns.Text()
    asset_url = columns.Text()
    image_url = columns.Text()

In [36]:
from typing import TYPE_CHECKING, Optional

from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster, dict_factory
from cassandra.cqlengine import connection

from opensea_monitoring.utils.configs import settings

if TYPE_CHECKING:
    from datetime import datetime

    from cassandra.cluster import Session


class OpenSeaDataMonitoringClient:
    """
    Implements the main business logic for the client
    application that will be used to monitor the data
    from OpenSea.

    Retrieves the data from the Cassandra database
    populated by the data pipeline and performs
    the necessary transformations to return the
    data in the desired format.
    """

    def __init__(
        self,
        cassandra_host: Optional[str | list[str]] = None,
        cassandra_username: Optional[str] = None,
        cassandra_password: Optional[str] = None,
        cassandra_port: Optional[int] = None,
        default_keyspace: Optional[str] = None,
    ):
        self._cluster = self._get_cassandra_cluster(
            cassandra_host, cassandra_username, cassandra_password, cassandra_port
        )
        self._session = self._cluster.connect(default_keyspace)
        self._session.row_factory = dict_factory
        connection.set_session(self._session)

    def __str__(self):
        return f"{self.__class__.__name__}({self._session.hosts})"

    def __repr__(self):
        return f"<{self.__str__()}>"

    @property
    def session(self) -> "Session":
        return self._session

    def get_global_metrics(
        self,
        metric: Optional[str] = None,
        grain: Optional[str] = None,
        from_ts: Optional["datetime"] = None,
        to_ts: Optional["datetime"] = None,
        limit: Optional[int] = None,
        order_ascending: Optional[bool] = None,
    ) -> list[GlobalMetrics]:
        """
        Retrieves the global metrics from the Cassandra database.
        You can filter the results by metric, grain, and timestamp range.

        Note that for any query, both metric and grain must be provided.

        @param metric: The metric to filter by.
        @param grain: The grain to filter by.
        @param from_ts: The start timestamp to filter by.
        @param to_ts: The end timestamp to filter by.
        @param limit: The maximum number of results to return.
        @param order_ascending: Use this to specify the order of the results.
            If True, the results will be sorted in ascending order by timestamp.
            If False, the results will be sorted in descending order by timestamp.
            Default is None, which means the results will be returned in the order
            they were inserted in the database.
        @return: A list of GlobalMetrics objects.
        """
        global_metrics = GlobalMetrics.objects.all()
        # metric and grain must be filtered together
        if bool(metric) ^ bool(grain):
            raise ValueError("Both metric and grain must be provided or neither.")
        if metric and grain:
            metric_name = f"{metric}__{grain}"
            global_metrics = global_metrics.filter(metric=metric_name)
        if from_ts:
            global_metrics = global_metrics.filter(timestamp_at__gte=from_ts)
        if to_ts:
            global_metrics = global_metrics.filter(timestamp_at__lte=to_ts)
        if limit:
            global_metrics = global_metrics.limit(limit)
        if order_ascending is not None:
            if order_ascending:
                global_metrics = global_metrics.order_by("timestamp_at")
            else:
                global_metrics = global_metrics.order_by("-timestamp_at")
        return list(global_metrics)

    def get_collection_metrics(
        self,
        collection: Optional[str] = None,
        metric: Optional[str] = None,
        grain: Optional[str] = None,
        from_ts: Optional["datetime"] = None,
        to_ts: Optional["datetime"] = None,
        limit: Optional[int] = None,
        order_ascending: Optional[bool] = None,
    ) -> list[CollectionMetrics]:
        """
        Retrieves specific collection metrics from the Cassandra database.
        You can filter the results by collection, metric, grain, and timestamp range.

        Note that for any query, both collection, metric, and grain must be provided.

        @param collection: The collection to filter by.
        @param metric: The metric to filter by.
        @param grain: The grain to filter by.
        @param from_ts: The start timestamp to filter by.
        @param to_ts: The end timestamp to filter by.
        @param limit: The maximum number of results to return.
        @param ascending: If True, the results will be sorted in ascending
            order by timestamp.
        @return: A list of CollectionMetrics objects.
        """
        collection_metrics = CollectionMetrics.objects.all()
        # metric, grain, and collection must be filtered together
        if bool(collection) ^ bool(metric) ^ bool(grain):
            raise ValueError(
                "collection, metric, and grain must be provided or neither."
            )
        if collection and metric and grain:
            metric_name = f"{metric}__{grain}"
            collection_metrics = collection_metrics.filter(
                collection=collection, metric=metric_name
            )
        if from_ts:
            collection_metrics = collection_metrics.filter(timestamp_at__gte=from_ts)
        if to_ts:
            collection_metrics = collection_metrics.filter(timestamp_at__lte=to_ts)
        if limit:
            collection_metrics = collection_metrics.limit(limit)
        if order_ascending is not None:
            if order_ascending:
                collection_metrics = collection_metrics.order_by("timestamp_at")
            else:
                collection_metrics = collection_metrics.order_by("-timestamp_at")
        return list(collection_metrics)

    def _get_cassandra_cluster(
        self,
        cassandra_host: Optional[str | list[str]] = None,
        cassandra_username: Optional[str] = None,
        cassandra_password: Optional[str] = None,
        cassandra_port: Optional[int] = None,
    ) -> Cluster:
        """
        Retrieves a Cassandra cluster object with the provided configuration.
        """
        auth_provider = PlainTextAuthProvider(
            username=cassandra_username or settings.cassandra_username,
            password=cassandra_password or settings.cassandra_password,
        )
        host = cassandra_host or settings.cassandra_host
        if isinstance(host, str):
            host = [host]
        port = cassandra_port or settings.cassandra_port
        cluster = Cluster(
            host,
            port=port,
            auth_provider=auth_provider,
        )
        return cluster

In [37]:
client = OpenSeaDataMonitoringClient(
    cassandra_host="cassandra",
    cassandra_username="cassandra",
    cassandra_password="cassandra",
    cassandra_port=9042,
    default_keyspace="opensea",
)
client

<OpenSeaDataMonitoringClient([<Host: 172.18.0.5:9042 datacenter1>])>

In [47]:
client.get_global_metrics(
    metric="total_usd_volume",
    order_ascending=False,
)

ValueError: Both metric and grain must be provided or neither.

In [39]:
client.get_collection_metrics(limit=10)

[]