# Caching embeddings on disk

In [1]:
import logging

from embed import cached

In [2]:
logging.basicConfig(level=logging.INFO)

## Already saved: cache hit

In [3]:
cached.embed_one('hello', data_dir='../data')

INFO:embed.cached:embed_one: loaded: ../data/c5919eb25e32df3ac400757942250b6a9776c7b1ac1e8e465ec6ca0de8e4cb3f.json


array([-0.02500221, -0.01938596, -0.02780373, ..., -0.01629372,
       -0.00507444, -0.00610849], dtype=float32)

In [4]:
cached.embed_many(['hello', 'goodbye'], data_dir='../data')

INFO:embed.cached:embed_many: loaded: ../data/b87efb3ff4d612e1376e4474b5efaa69f550cd479ae9a8e07ecb4fb20d0e7b87.json


array([[-0.02504645, -0.01940274, -0.0277824 , ..., -0.01633637,
        -0.00506216, -0.00611953],
       [ 0.00578627, -0.00664984, -0.00819586, ..., -0.01518854,
         0.00575716, -0.00608059]], dtype=float32)

## Not yet saved: cache miss, then hit

In [5]:
cached.embed_one('hola', data_dir='../data')

INFO:embed.cached:embed_one: saved: ../data/b58e4a60c963f8b3c43d83cc9245020ce71d8311fa2f48cfd36deed6f472a71b.json


array([-0.03501248, -0.00323063, -0.02819939, ...,  0.00106455,
        0.00356109,  0.01164014], dtype=float32)

In [6]:
cached.embed_one('hola', data_dir='../data')

INFO:embed.cached:embed_one: loaded: ../data/b58e4a60c963f8b3c43d83cc9245020ce71d8311fa2f48cfd36deed6f472a71b.json


array([-0.03501248, -0.00323063, -0.02819939, ...,  0.00106455,
        0.00356109,  0.01164014], dtype=float32)

In [7]:
!rm ../data/b58e4a60c963f8b3c43d83cc9245020ce71d8311fa2f48cfd36deed6f472a71b.json

In [8]:
cached.embed_many(['hola', 'hello'], data_dir='../data')

INFO:embed.cached:embed_many: saved: ../data/4a77f419587b08963e94105b8b9272531e53ade9621b613fda175aa0a96cd839.json


array([[-0.03501248, -0.00323063, -0.02819939, ...,  0.00106455,
         0.00356109,  0.01164014],
       [-0.02499537, -0.01935126, -0.02775795, ..., -0.01632432,
        -0.00503278, -0.00612988]], dtype=float32)

In [9]:
cached.embed_many(['hola', 'hello'], data_dir='../data')

INFO:embed.cached:embed_many: loaded: ../data/4a77f419587b08963e94105b8b9272531e53ade9621b613fda175aa0a96cd839.json


array([[-0.03501248, -0.00323063, -0.02819939, ...,  0.00106455,
         0.00356109,  0.01164014],
       [-0.02499537, -0.01935126, -0.02775795, ..., -0.01632432,
        -0.00503278, -0.00612988]], dtype=float32)

In [10]:
!rm ../data/4a77f419587b08963e94105b8b9272531e53ade9621b613fda175aa0a96cd839.json

## Generating data for tests

To regenerate this test data, delete the `.json` files in `tests_data/` and re-run the notebook.

In [11]:
cached.embed_one('Your text string goes here', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/c6569ae39bd3ce1a82988cf5a0c219288e2df4918c9e9a09134f596f74509094.json


array([-0.00700073, -0.0053043 ,  0.01185825, ..., -0.01703825,
       -0.00011485, -0.02399868], dtype=float32)

In [12]:
cached.embed_one('The cat runs.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/84cccf9b16c60b6ae531f97eafc6aabd536fbeb5fd62f3108500642fd34562bf.json


array([-0.00906457, -0.00409244, -0.01109804, ..., -0.02477139,
        0.00554583, -0.02427418], dtype=float32)

In [13]:
cached.embed_one('El gato corre.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/94aac07b5ec049de3cd871a0958697b02683e371b46658395302ecceab419b36.json


array([-0.01420416, -0.00294338,  0.00040551, ..., -0.00811486,
        0.00492778, -0.00681724], dtype=float32)

In [14]:
cached.embed_one('The dog walks.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/116c3040d924d3f415ba72b32a55bf3b05ba62bebbcbdca9ae2e5101d5a0fbdf.json


array([ 0.00955974, -0.00636049, -0.00593287, ..., -0.0123092 ,
       -0.00036863, -0.01713025], dtype=float32)

In [15]:
cached.embed_one('El perro camina.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/c19b4807aa7da53ef4f0bb7c5e2c97145607b058a22a0b0dbe696402200f0812.json


array([ 3.3449344e-03, -8.1113884e-03,  1.7457254e-03, ...,
        9.9317353e-05, -9.5703155e-03, -5.5582649e-03], dtype=float32)

In [16]:
cached.embed_one('The lion sleeps.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/7f688454101c9a4f6d0adf7b9c8470f7815da6689e64e9125e4f9400027853a9.json


array([ 0.01427713,  0.00727954, -0.00717061, ..., -0.00162684,
        0.02899001, -0.02586288], dtype=float32)

In [17]:
cached.embed_one('El león duerme.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/ce38d8bb2f403e60cd74c161595830fdf1a10b63133e577ecc051407ed6da3ff.json


array([ 0.01224608,  0.00345483,  0.0032566 , ...,  0.01358018,
        0.01453671, -0.00654467], dtype=float32)

In [18]:
cached.embed_many([
    'Your text string goes here',
    'The cat runs.',
    'El gato corre.',
    'The dog walks.',
    'El perro camina.',
], data_dir='../tests_data')

INFO:embed.cached:embed_many: saved: ../tests_data/bd1c3556111ed8b208c08f5d417be020965c48957144eb0e2a19869a10302413.json


array([[-6.9292835e-03, -5.3364220e-03,  1.1875892e-02, ...,
        -1.7017407e-02, -4.5471323e-05, -2.4047505e-02],
       [-9.1028195e-03, -4.1309856e-03, -1.1052059e-02, ...,
        -2.4805024e-02,  5.5642496e-03, -2.4155278e-02],
       [-1.4176358e-02, -2.9412776e-03,  3.8862362e-04, ...,
        -8.0564748e-03,  4.8509724e-03, -6.8856594e-03],
       [ 9.5758662e-03, -6.3913045e-03, -5.9445151e-03, ...,
        -1.2389688e-02, -3.8638557e-04, -1.7123753e-02],
       [ 3.4598345e-03, -7.5935414e-03,  1.4249589e-03, ...,
         4.5509770e-04, -8.7353801e-03, -6.1272453e-03]], dtype=float32)