# First-class Caching in Hamilton

In [None]:
import logging

logger = logging.getLogger("hamilton.caching.fingerprinting")
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

%load_ext hamilton.plugins.jupyter_magic

# Fingerprinting
Fingerprinting derives an identifier from a data object.

In [2]:
import pandas as pd
from hamilton import driver
from hamilton.caching.fingerprinting import FingerprintingAdapter

In [3]:
%%cell_to_module primitives
import pandas as pd

def A() -> int:
    return 7

def B(A: int) -> float:
    return float(A)

def C(B: float, D: bool = True) -> bool:
    return B != D

def E(C: bool) -> str:
    return "hello-world"

def F(E: str) -> dict:
    return {E: E*3}

def G() -> pd.DataFrame:
    return pd.DataFrame({"a": [323, 3235], "b": ["hello", "vorld"]})

In [7]:
fingerprint_adapter = FingerprintingAdapter()
dr = (
    driver.Builder()
    .with_modules(primitives)
    .with_adapters(fingerprint_adapter)
    .build()
)
dr.execute(["A", "B", "C", "D", "E", "F", "G"])

fingerprint_adapter.run_fingerprints

{'A': 'jxTkX87qFnpaNt7dS-olQw==',
 'B': 'Ad7r-oe5OMieyz3LTv_L4g==',
 'C': '-CfPRi9ihI3zfF4elKTadA==',
 'E': 'IJUxIYl1PeatR9_iDL6X7A==',
 'F': 'eOXd-WuG4ZFy8yz1O9NH5uzyw1wzWbsypyuNNw==',
 'G': 'TCtsLwovEJ2EIAYt60Q4_w36aBdTnSXVNPQpsA=='}

# Caching

# API Design

```python

dr = (
    driver.Builder()
    .with_cache()
    .build()
)
```


## Interface
```python
# ... Builder
def with_cache(
    self,
    fingerprint: Optional[Callable] = None
    fingerprint_repo: Optional["Repository"] = None
    data_repo: Optional["Repository"] = None
    ..., # other behavior kwargs like "recompute", "skip", etc.
):
    from hamilton.caching import caching
    from hamilton.caching import fingerprinting
    from hamilton.caching import repository

    if fingerprint:
        fingerprint = fingerprint
    else:
        fingerprint = fingerprinting.hash_value

    # "fingerprint repository" could also be named "the repository" i.e., metadata
    # used to manage data storage / "the cache"
    # dbm uses {str | bytes: str | bytes} on disk
    fingerprint_repo = (
        fingerprint_repo if fingerprint_repo else repository.dbmRepository()
    )

    # "data repository" could also be named "the cache" i.e., where data is stored
    # shelve uses {str | bytes: pickle} on disk
    data_repo = data_repo if data_repo else repository.ShelveRepository()

    adapter = adapters.CacheAdapter(
        fingerprint=fingerprint,
        fingerprint_repo=fingerprint_repo,
        data_repo=data_repo,
        ..., 
    )
    self.adapters.append(adapter)

```