# Caching embeddings on disk

In [1]:
import logging

from embed import cached

In [2]:
logging.basicConfig(level=logging.INFO)

## Already saved: cache hit

In [3]:
cached.embed_one('hello', data_dir='../data')

INFO:embed.cached:embed_one: loaded: ../data/c5919eb25e32df3ac400757942250b6a9776c7b1ac1e8e465ec6ca0de8e4cb3f.json


array([-0.02499537, -0.01935126, -0.02775795, ..., -0.01632432,
       -0.00503278, -0.00612988], dtype=float32)

In [4]:
cached.embed_many(['hello', 'goodbye'], data_dir='../data')

INFO:embed.cached:embed_many: loaded: ../data/c3a31ea6546aa7f9fa1e78d22a23001d98fa4a7abb43350e0ea56395d3abc61c.json


array([[-0.02499537, -0.01935126, -0.02775795, ..., -0.01632432,
        -0.00503278, -0.00612988],
       [ 0.00577373, -0.00664383, -0.00824171, ..., -0.01513784,
         0.0057058 , -0.00603896]], dtype=float32)

## Not yet saved: cache miss, then hit

In [5]:
cached.embed_one('hola', data_dir='../data')

INFO:embed.cached:embed_one: saved: ../data/b58e4a60c963f8b3c43d83cc9245020ce71d8311fa2f48cfd36deed6f472a71b.json


array([-0.03501248, -0.00323063, -0.02819939, ...,  0.00106455,
        0.00356109,  0.01164014], dtype=float32)

In [6]:
cached.embed_one('hola', data_dir='../data')

INFO:embed.cached:embed_one: loaded: ../data/b58e4a60c963f8b3c43d83cc9245020ce71d8311fa2f48cfd36deed6f472a71b.json


array([-0.03501248, -0.00323063, -0.02819939, ...,  0.00106455,
        0.00356109,  0.01164014], dtype=float32)

In [7]:
!rm ../data/b58e4a60c963f8b3c43d83cc9245020ce71d8311fa2f48cfd36deed6f472a71b.json

In [8]:
cached.embed_many(['hola', 'hello'], data_dir='../data')

INFO:embed.cached:embed_many: saved: ../data/2e41e52e67421c1d106bb8a5b9225ad1143761240862ed61e5be5ed20f39f2fd.json


array([[-0.03501248, -0.00323063, -0.02819939, ...,  0.00106455,
         0.00356109,  0.01164014],
       [-0.02499537, -0.01935126, -0.02775795, ..., -0.01632432,
        -0.00503278, -0.00612988]], dtype=float32)

In [9]:
cached.embed_many(['hola', 'hello'], data_dir='../data')

INFO:embed.cached:embed_many: loaded: ../data/2e41e52e67421c1d106bb8a5b9225ad1143761240862ed61e5be5ed20f39f2fd.json


array([[-0.03501248, -0.00323063, -0.02819939, ...,  0.00106455,
         0.00356109,  0.01164014],
       [-0.02499537, -0.01935126, -0.02775795, ..., -0.01632432,
        -0.00503278, -0.00612988]], dtype=float32)

In [10]:
!rm ../data/2e41e52e67421c1d106bb8a5b9225ad1143761240862ed61e5be5ed20f39f2fd.json

## Generating data for tests

To regenerate this test data, delete the `.json` files in `tests_data/` and re-run the notebook.

In [11]:
cached.embed_one('Your text string goes here', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/c6569ae39bd3ce1a82988cf5a0c219288e2df4918c9e9a09134f596f74509094.json


array([-6.9754184e-03, -5.3491648e-03,  1.1907940e-02, ...,
       -1.7028622e-02, -8.8358240e-05, -2.4030920e-02], dtype=float32)

In [12]:
cached.embed_one('The cat runs.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/84cccf9b16c60b6ae531f97eafc6aabd536fbeb5fd62f3108500642fd34562bf.json


array([-0.0090431 , -0.00407863, -0.01109516, ..., -0.02476496,
        0.00559219, -0.0241914 ], dtype=float32)

In [13]:
cached.embed_one('El gato corre.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/94aac07b5ec049de3cd871a0958697b02683e371b46658395302ecceab419b36.json


array([-0.0142647 , -0.00291274,  0.00040622, ..., -0.00806266,
        0.00487303, -0.00684123], dtype=float32)

In [14]:
cached.embed_one('The dog walks.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/116c3040d924d3f415ba72b32a55bf3b05ba62bebbcbdca9ae2e5101d5a0fbdf.json


array([ 0.00955871, -0.00640098, -0.00589739, ..., -0.01238388,
       -0.00030762, -0.01710306], dtype=float32)

In [15]:
cached.embed_one('El perro camina.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/c19b4807aa7da53ef4f0bb7c5e2c97145607b058a22a0b0dbe696402200f0812.json


array([ 0.00340059, -0.00801055,  0.00170341, ...,  0.00011718,
       -0.00956902, -0.00559491], dtype=float32)

In [16]:
cached.embed_one('The lion sleeps.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/7f688454101c9a4f6d0adf7b9c8470f7815da6689e64e9125e4f9400027853a9.json


array([ 0.01427713,  0.00727954, -0.00717061, ..., -0.00162684,
        0.02899001, -0.02586288], dtype=float32)

In [17]:
cached.embed_one('El león duerme.', data_dir='../tests_data')

INFO:embed.cached:embed_one: saved: ../tests_data/ac9c4b85f0dead134e3de01ecc5bbbad4411198db04b56ed1658a48d27ab42bf.json


array([ 0.01215479,  0.00411979,  0.00353439, ...,  0.01378508,
        0.01454042, -0.00649598], dtype=float32)

In [18]:
cached.embed_many([
    'Your text string goes here',
    'The cat runs.',
    'El gato corre.',
    'The dog walks.',
    'El perro camina.',
], data_dir='../tests_data')

INFO:embed.cached:embed_many: saved: ../tests_data/bc4f1fc1fa46eb8657d9793adaad43d1f09e0e5a21c9139505e96463b3c321d9.json


array([[-6.99112890e-03, -5.31056896e-03,  1.19118076e-02, ...,
        -1.70207098e-02, -7.68856116e-05, -2.40656175e-02],
       [-9.09975544e-03, -4.11655614e-03, -1.11006815e-02, ...,
        -2.47758050e-02,  5.55989938e-03, -2.42022909e-02],
       [-1.42147215e-02, -2.92078988e-03,  3.90018802e-04, ...,
        -8.08201265e-03,  4.93654609e-03, -6.90483581e-03],
       [ 9.59877204e-03, -6.36116648e-03, -5.89548331e-03, ...,
        -1.23801986e-02, -3.47282301e-04, -1.70813799e-02],
       [ 3.45983449e-03, -7.59354141e-03,  1.42495893e-03, ...,
         4.55097703e-04, -8.73538014e-03, -6.12724526e-03]], dtype=float32)