In [None]:
%pip install numpy redis

In [None]:
import numpy as np
from redis import Redis
from redis.commands.search.field import VectorField, TagField, NumericField, TextField
from redis.commands.search.aggregation import AggregateResult, AggregateRequest

In [None]:
host = "localhost"
port = 6379

redis_conn = Redis(host = host, port = port)

In [None]:
# Index fields and configurations

n_vec = 10000
dim = 128
M = 40
EF = 200
vector_field_name = "vector"
title_field_name = "title"
genre_field_name = "genre"
rating_field_name = "rating"
k = 10

In [None]:
def load_docs(client : Redis, n, d):
    for i in range(1, n+1):
        np_vector = np.random.rand(1, d).astype(np.float16)
        if i%5 != 0:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),  # ratings ranges from 0-10, proportional the doc id
                                      genre_field_name: "action",
                                      title_field_name: "matrix"})
        else:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),
                                      genre_field_name: "action, drama",
                                      title_field_name: "spiderman"})

def delete_data(client: Redis):
    client.flushall()

def print_results(res):
    docs = [int(doc.id) for doc in res.docs]
    dists = [float(doc.dist) if hasattr(doc, 'dist') else '-' for doc in res.docs]
    print(f"got {len(docs)} doc ids: ", docs)
    print("\ndistances: ", dists)


In [None]:
# build HNSW index
delete_data(redis_conn)

schema = (VectorField(vector_field_name, "HNSW", {"TYPE": "FLOAT16", "DIM": dim, "DISTANCE_METRIC": "COSINE"}),
          NumericField(rating_field_name), TagField(genre_field_name), TextField(title_field_name))
redis_conn.ft().create_index(schema)
redis_conn.ft().config_set("default_dialect", 2)

# load vectors with meta-data
np.random.seed(42)
load_docs(redis_conn, n_vec, dim)

print("index size: ", redis_conn.ft().info()['num_docs'])

query_vector = np.random.rand(1, dim).astype(np.float16)

In [None]:
# Give me the top 10 reviews on action movies similar to mine
filter_query = 'matrix|spiderman'
multiplier = 100
vector_weight = 0.7
text_weight = 1 - vector_weight

q = [f'({filter_query})=>[KNN {k*multiplier} @{vector_field_name} $vec_param AS dist]']
q += ['ADDSCORES']
q += ['APPLY', f'(2 - @dist) * {vector_weight} + @tfidf * {text_weight}', 'AS', 'score']
q += ['SORTBY', 2, '@score', 'DESC', 'MAX', k]
q += ['PARAMS', 2, 'vec_param', query_vector.tobytes()]

res = redis_conn.execute_command('FT.AGGREGATE', 'idx', *q)

print(res)