# Prepare Connection

In [None]:
%pip install pymysql sentence-transformers

In [None]:
import pymysql
def get_connection():
    connection = pymysql.connect(
        host = "127.0.0.1",
        port = 4002,
        user = "root",
        database = "public",
    )
    return connection
c = get_connection()
cursor = c.cursor()

# Prepare Model

Note that loading the model may take tens of seconds

In [None]:
from sentence_transformers import SentenceTransformer

embed_model = SentenceTransformer("sentence-transformers/msmarco-MiniLM-L12-cos-v5", trust_remote_code=True)
embed_model_dims = embed_model.get_sentence_embedding_dimension()

def text_to_embedding(text):
    """Generates vector embeddings for the given text."""
    embedding = embed_model.encode(text)
    return embedding.tolist()

# Create a Vector Table

In [None]:
cursor.execute(f"""
CREATE TABLE IF NOT EXISTS embedded_documents(
    ts TIMESTAMP TIME INDEX DEFAULT CURRENT_TIMESTAMP,
    document TEXT PRIMARY KEY,
    embedding VECTOR({embed_model_dims}));
"""
)

# Store the Vector Embeddings

In [None]:
documents = ["dog", "fish", "tree"]

def embedding_s(embedding):
    return f"[{','.join(map(str, embedding))}]"

def insert_doc(doc):
    embedding = embedding_s(text_to_embedding(doc))
    cursor.execute(f"""
INSERT INTO embedded_documents VALUES (DEFAULT, '{doc}', '{embedding}');
    """);

for doc in documents:
    insert_doc(doc)

# Inspect the Vector Table

In [None]:
cursor.execute("""
SELECT * FROM embedded_documents;
""");

In [None]:
for row in cursor:
    print(row)

# Search

The search term is "a swimming animal" which vector embedding is `[1,2,3]`

In [None]:
def search(query, k):
    query_embedding = embedding_s(text_to_embedding(query))
    cursor.execute(f"""
SELECT document, vec_cos_distance(embedding, '{query_embedding}') AS distance
FROM embedded_documents
ORDER BY distance
LIMIT {k};
    """);
    return cursor.fetchall()

query = "a swimming animal"
res = search(query, 3)
for doc in res:
    print(doc)