WIP: Add user guide

requests-cache · Apr 9, 2021 · 9c01f0d · 9c01f0d
1 parent 3883126
commit 9c01f0d
Show file tree

Hide file tree

Showing 7 changed files with 394 additions and 53 deletions.
diff --git a/aiohttp_client_cache/backends/base.py b/aiohttp_client_cache/backends/base.py
@@ -27,51 +27,6 @@ class CacheBackend:
     To extend this with your own custom backend, implement one or more subclasses of
     :py:class:`.BaseCache` to use as :py:attr:`CacheBackend.responses` and
     :py:attr:`CacheBackend.response_aliases`.
-
-    **Cache Name:**
-
-    The ``cache_name`` parameter will be used as follows depending on the backend:
-
-    * ``sqlite``: Cache filename prefix, e.g ``my_cache.sqlite``
-    * ``mongodb``: Database name
-    * ``redis``: Namespace, meaning all keys will be prefixed with ``'cache_name:'``
-
-    **Cache Keys:**
-
-    The cache key is a hash created from request information, and is used as an index for cached
-    responses. There are a couple ways you can customize how the cache key is created:
-
-    * Use ``include_get_headers`` if you want headers to be included in the cache key. In other
-      words, this will create separate cache items for responses with different headers.
-    * Use ``ignored_parameters`` to exclude specific request params from the cache key. This is
-      useful, for example, if you request the same resource with different credentials or access
-      tokens.
-
-    **URL Patterns:**
-
-    The ``urls_expire_after`` parameter can be used to set different expiration times for different
-    requests, based on glob patterns. This allows you to customize caching based on what you
-    know about what you're requesting. For example, you might request one resource that gets updated
-    frequently, another that changes infrequently, and another that never changes.
-
-    Example::
-
-        urls_expire_after = {
-            '*.site_1.com': timedelta(days=1),
-            'site_2.com/resource_1': timedelta(hours=12),
-            'site_2.com/resource_2': 60,
-            'site_2.com/static': -1,
-        }
-
-    Notes:
-
-    * ``urls_expire_after`` should be a dict in the format ``{'pattern': expiration_time}``
-    * ``expiration_time`` may be either a number (in seconds) or a ``timedelta``
-      (same as ``expire_after``)
-    * Patterns will match request **base URLs**, so the pattern ``site.com/base`` is equivalent to
-      ``https://site.com/base/**``
-    * If there is more than one match, the first match (in the order they are defined) will be used
-    * If no patterns match a request, ``expire_after`` will be used as a default.
     """
 
     def __init__(
@@ -317,15 +272,15 @@ async def clear(self):
 
     @abstractmethod
     async def delete(self, key: str):
-        """Delete a single item from the cache. Does not raise an error if the item is missing."""
+        """Delete an item from the cache. Does not raise an error if the item is missing."""
 
     @abstractmethod
     def keys(self) -> AsyncIterable[str]:
         """Get all keys stored in the cache"""
 
     @abstractmethod
     async def read(self, key: str) -> ResponseOrKey:
-        """Read a single item from the cache. Returns ``None`` if the item is missing."""
+        """Read an item from the cache. Returns ``None`` if the item is missing."""
 
     @abstractmethod
     async def size(self) -> int:

diff --git a/aiohttp_client_cache/session.py b/aiohttp_client_cache/session.py
@@ -46,16 +46,16 @@ async def _request(
             return new_response
 
     @asynccontextmanager
-    async def disable_cache(self):
+    async def disabled(self):
         """Temporarily disable the cache
 
         Example:
 
-            >>> session = CachedSession()
-            >>> await session.get('http://httpbin.org/ip')
-            >>> async with session.disable_cache():
-            >>>     # Will return a new response, not a cached one
+            >>> async with CachedSession() as session:
             >>>     await session.get('http://httpbin.org/ip')
+            >>>     async with session.disabled():
+            >>>         # Will return a new response, not a cached one
+            >>>         await session.get('http://httpbin.org/ip')
         """
         self.cache.disabled = True
         yield

diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst
@@ -0,0 +1,123 @@
+.. _advanced_usage:
+
+Advanced Usage
+==============
+This section covers some more advanced and use-case-specific features.
+
+.. contents::
+    :local:
+
+Custom Response Filtering
+-------------------------
+If you need more advanced behavior for determining what to cache, you can provide a custom filtering
+function via the ``filter_fn`` param. This can by any function that takes a :py:class:`requests.Response`
+object and returns a boolean indicating whether or not that response should be cached. It will be applied
+to both new responses (on write) and previously cached responses (on read). Example:
+
+    >>> from sys import getsizeof
+    >>> from aiohttp_client_cache import CachedSession, SQLiteCache
+    >>>
+    >>> def filter_by_size(response):
+    >>>     """Don't cache responses with a body over 1 MB"""
+    >>>     return getsizeof(response.content) <= 1024 * 1024
+    >>>
+    >>> cache = SQLiteCache(filter_fn=filter_by_size)
+
+Custom Backends
+---------------
+If the built-in :py:mod:`Cache Backends <aiohttp_client_cache.backends>` don't suit your needs, you can
+create your own by making subclasses of :py:class:`.CacheBackend` and :py:class:`.BaseCache`:
+
+    >>> from aiohttp_client_cache import CachedSession
+    >>> from aiohttp_client_cache.backends import BaseCache, BaseStorage
+    >>>
+    >>> class CustomCache(BaseCache):
+    ...     """Wrapper for higher-level cache operations. In most cases, the only thing you need
+    ...     to specify here is which storage class(es) to use.
+    ...     """
+    ...     def __init__(self, **kwargs):
+    ...         super().__init__(**kwargs)
+    ...         self.redirects = CustomStorage(**kwargs)
+    ...         self.responses = CustomStorage(**kwargs)
+    >>>
+    >>> class CustomStorage(BaseStorage):
+    ...     """interface for lower-level backend storage operations"""
+    ...     def __init__(self, **kwargs):
+    ...         super().__init__(**kwargs)
+    ...
+    ...     async def contains(self, key: str) -> bool:
+    ...         """Check if a key is stored in the cache"""
+    ...
+    ...     async def clear(self):
+    ...         """Delete all items from the cache"""
+    ...
+    ...     async def delete(self, key: str):
+    ...         """Delete an item from the cache"""
+    ...
+    ...     async def keys(self) -> AsyncIterable[str]:
+    ...         """Get all keys stored in the cache"""
+    ...
+    ...     async def read(self, key: str) -> ResponseOrKey:
+    ...         """Read anitem from the cache"""
+    ...
+    ...     async def size(self) -> int:
+    ...         """Get the number of items in the cache"""
+    ...
+    ...     def values(self) -> AsyncIterable[ResponseOrKey]:
+    ...         """Get all values stored in the cache"""
+    ...
+    ...     async def write(self, key: str, item: ResponseOrKey):
+    ...         """Write an item to the cache"""
+
+You can then use your custom backend in a :py:class:`.CachedSession` with the ``cache`` parameter:
+
+    >>> session = CachedSession(cache=CustomCache())
+
+Cache Inspection
+----------------
+Here are some ways to get additional information out of the cache session, backend, and responses:
+
+Response Attributes
+~~~~~~~~~~~~~~~~~~~
+The following attributes are available on responses:
+* ``from_cache``: indicates if the response came from the cache
+* ``created_at``: :py:class:`~datetime.datetime` of when the cached response was created or last updated
+* ``expires``: :py:class:`~datetime.datetime` after which the cached response will expire
+* ``is_expired``: indicates if the cached response is expired (if an old response was returned due to a request error)
+
+Examples:
+
+    >>> from aiohttp_client_cache import CachedSession
+    >>> session = CachedSession(expire_after=timedelta(days=1))
+
+    >>> # Placeholders are added for non-cached responses
+    >>> r = session.get('http://httpbin.org/get')
+    >>> print(r.from_cache, r.created_at, r.expires, r.is_expired)
+    False None None None
+
+    >>> # Values will be populated for cached responses
+    >>> r = session.get('http://httpbin.org/get')
+    >>> print(r.from_cache, r.created_at, r.expires, r.is_expired)
+    True 2021-01-01 18:00:00 2021-01-02 18:00:00 False
+
+Cache Contents
+~~~~~~~~~~~~~~
+You can use :py:meth:`.CachedSession.cache.urls` to see all URLs currently in the cache:
+
+    >>> session = CachedSession()
+    >>> print(session.urls)
+    ['https://httpbin.org/get', 'https://httpbin.org/stream/100']
+
+If needed, you can get more details on cached responses via ``CachedSession.cache.responses``, which
+is a dict-like interface to the cache backend. See :py:class:`.CachedResponse` for a full list of
+attributes available.
+
+For example, if you wanted to to see all URLs requested with a specific method:
+
+    >>> post_urls = [
+    >>>     response.url for response in session.cache.responses.values()
+    >>>     if response.request.method == 'POST'
+    >>> ]
+
+You can also inspect ``CachedSession.cache.redirects``, which maps redirect URLs to keys of the
+responses they redirect to.
diff --git a/docs/conf.py b/docs/conf.py
@@ -30,6 +30,7 @@
 # Sphinx extension modules
 extensions = [
     'sphinx.ext.autodoc',
+    'sphinx.ext.autosectionlabel',
     'sphinx.ext.intersphinx',
     'sphinx.ext.napoleon',
     # 'sphinx.ext.viewcode',
@@ -81,7 +82,7 @@
 html_theme_options = {
     'color_primary': 'blue',
     'color_accent': 'light-blue',
-    'globaltoc_depth': 1,
+    'globaltoc_depth': 3,
     'globaltoc_includehidden': False,
     'logo_icon': '&#xe1af',
     'repo_url': 'https://github.com/JWCook/aiohttp-client-cache',

diff --git a/docs/index.rst b/docs/index.rst
@@ -14,6 +14,9 @@ Contents
 .. toctree::
     :maxdepth: 2
 
+    user_guide
+    advanced_usage
+    security
     reference
     history
     contributing

diff --git a/docs/security.rst b/docs/security.rst
@@ -0,0 +1,57 @@
+.. _security:
+
+Security
+========
+
+Pickle Vulnerabilities
+----------------------
+.. warning:: The python ``pickle`` module has `known security vulnerabilities <https://docs.python.org/3/library/pickle.html>`_,
+    potentially leading to code execution when deserialzing data.
+
+This means it should only be used to deserialize data that you trust hasn't been tampered with.
+Since this isn't always possible, aiohttp-client-cache can optionally use
+`itsdangerous <https://itsdangerous.palletsprojects.com>`_ to add a layer of security around these operations.
+It works by signing serialized data with a secret key that you control. Then, if the data is tampered
+with, the signature check fails and raises an error.
+
+Creating and Storing a Secret Key
+---------------------------------
+To enable this behavior, first create a secret key, which can be any ``str`` or ``bytes`` object.
+
+One common pattern for handling this is to store it wherever you store the rest of your credentials
+(`Linux keyring <https://itsfoss.com/ubuntu-keyring>`_,
+`macOS keychain <https://support.apple.com/guide/mac-help/use-keychains-to-store-passwords-mchlf375f392/mac>`_,
+`password database <https://keepassxc.org>`_, etc.),
+set it in an environment variable, and then read it in your application:
+
+    >>> import os
+    >>> secret_key = os.environ['SECRET_KEY']
+
+Alternatively, you can use the `keyring <https://keyring.readthedocs.io>`_ package to read the key
+directly:
+
+    >>> import keyring
+    >>> secret_key = keyring.get_password('aiohttp-client-cache-example', 'secret_key')
+
+Signing Cached Responses
+------------------------
+Once you have your key, just pass it to :py:class:`.CachedSession` or :py:func:`.install_cache` to start using it:
+
+    >>> from aiohttp_client_cache import CachedSession, RedisBackend
+    >>>
+    >>> cache = RedisBackend(secret_key=secret_key)
+    >>> async with CachedSession(cache=cache) as session:
+    >>>     await session.get('https://httpbin.org/get')
+
+You can verify that it's working by modifying the cached item (*without* your key):
+
+    >>> cache_2 = RedisBackend(secret_key='a different key')
+    >>> async with CachedSession(cache=cache) as session_2:
+    >>>     cache_key = list(await session_2.cache.responses.keys())[0]
+    >>>     await session_2.cache.responses.write(cache_key, 'exploit!')
+
+Then, if you try to get that cached response again (*with* your key), you will get an error:
+
+    >>> async with CachedSession(cache=cache) as session:
+    >>>     await session.get('https://httpbin.org/get')
+    BadSignature: Signature b'iFNmzdUOSw5vqrR9Cb_wfI1EoZ8' does not match