In [2]:
!pip install sqlite-vec

Collecting sqlite-vec
  Downloading sqlite_vec-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux1_x86_64.whl.metadata (198 bytes)
Downloading sqlite_vec-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux1_x86_64.whl (151 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/151.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m92.2/151.6 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m151.6/151.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sqlite-vec
Successfully installed sqlite-vec-0.1.6


In [3]:
import sqlite3
import sqlite_vec

from typing import List
import struct

In [5]:
db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
sqlite_vec.load(db)
db.enable_load_extension(False)

sqlite_version, vec_version = db.execute(
    "select sqlite_version(), vec_version()"
).fetchone()
print(f"sqlite_version={sqlite_version}, vec_version={vec_version}")

sqlite_version=3.37.2, vec_version=v0.1.6


In [7]:
# Create a table called vec_items
db.execute("CREATE VIRTUAL TABLE vec_items USING vec0(embedding float[4])")

<sqlite3.Cursor at 0x7c3f43dea4c0>

In [4]:
def serialize_f32(vector: List[float]) -> bytes:
    """serializes a list of floats into a compact "raw bytes" format"""
    return struct.pack("%sf" % len(vector), *vector)

In [8]:

items = [
    (1, [0.1, 0.1, 0.1, 0.1]),
    (2, [0.2, 0.2, 0.2, 0.2]),
    (3, [0.3, 0.3, 0.3, 0.3]),
    (4, [0.4, 0.4, 0.4, 0.4]),
    (5, [0.5, 0.5, 0.5, 0.5]),
]

In [9]:
# Insert vectors into the table
with db:
    for item in items:
        db.execute(
            "INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)",
            [item[0], serialize_f32(item[1])],
        )

In [14]:
db.execute(
    """
      SELECT *
      FROM vec_items
    """).fetchall()

[(1, b'\xcd\xcc\xcc=\xcd\xcc\xcc=\xcd\xcc\xcc=\xcd\xcc\xcc='),
 (2, b'\xcd\xccL>\xcd\xccL>\xcd\xccL>\xcd\xccL>'),
 (3, b'\x9a\x99\x99>\x9a\x99\x99>\x9a\x99\x99>\x9a\x99\x99>'),
 (4, b'\xcd\xcc\xcc>\xcd\xcc\xcc>\xcd\xcc\xcc>\xcd\xcc\xcc>'),
 (5, b'\x00\x00\x00?\x00\x00\x00?\x00\x00\x00?\x00\x00\x00?')]

In [15]:
db.execute(
    """
      SELECT embedding
      FROM vec_items
      WHERE rowid = 1
    """).fetchall()

[(b'\xcd\xcc\xcc=\xcd\xcc\xcc=\xcd\xcc\xcc=\xcd\xcc\xcc=',)]

In [19]:
# Search for similar (cosine similarity) vectors
query = [0.3, 0.3, 0.3, 0.3]

rows = db.execute(
    """
      SELECT
        rowid,
        distance
      FROM vec_items
      WHERE embedding MATCH ?
      and k = ?
    """,
    [serialize_f32(query), 2]
).fetchall()

rows

[(3, 0.0), (4, 0.19999998807907104)]