# Vector Similarity for RediSearch - Hybrid queries


## Python examples

### Packages

In [147]:
!pip install git+https://github.com/redis/redis-py.git@master
!pip install numpy

Collecting git+https://github.com/redis/redis-py.git@master
  Cloning https://github.com/redis/redis-py.git (to revision master) to /tmp/pip-req-build-uo2rjptj
  Running command git clone --filter=blob:none --quiet https://github.com/redis/redis-py.git /tmp/pip-req-build-uo2rjptj
  Resolved https://github.com/redis/redis-py.git to commit e6ccc9ce52047c455d16abb87dd0ba8004347b75
  Preparing metadata (setup.py) ... [?25ldone


In [157]:
import numpy as np
from redis import Redis

### Create redis client

In [177]:
host = "localhost"
port = 6379

redis_conn = Redis(host = host, port = port)

In [178]:
# Index fields and configurations

n_vec = 10000
dim = 128
M = 40
EF = 200
vector_field_name = "vector"
title_field_name = "title"
genre_field_name = "genre"
rating_field_name = "rating"
k = 10

In [179]:
def load_vectors(client : Redis, n, d):
    for i in range(1, n+1):
        # np_vector = np.float32([i for j in range(dim)])
        np_vector = np.random.rand(1, d).astype(np.float32)
        if i%5 != 0:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),  # ratings ranges from 0-10, sorted by the id
                                      genre_field_name: "action",
                                      title_field_name: "matrix"})
        else:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),
                                      genre_field_name: "action, drama",
                                      title_field_name: "spiderman"})
        
def delete_data(client: Redis):
    client.flushall()
        

### Create HNSW index with meta-data

In [180]:
# build index
hnsw_index = redisearch.Client("my_hnsw_index", conn=redis_conn)
delete_data(redis_conn)
hnsw_index.redis.execute_command('FT.CREATE', "my_hnsw_index", 'SCHEMA',
                                 vector_field_name, 'VECTOR', 'HNSW', '8', 'TYPE', 'FLOAT32', 'DIM', dim, 'DISTANCE_METRIC', 'L2', 'EF_RUNTIME', EF,
                                 rating_field_name, 'NUMERIC',title_field_name, 'TEXT', genre_field_name, 'TAG')

#load vectors
load_vectors(hnsw_index.redis, n_vec, dim)

print("index size: ", hnsw_index.info()['num_docs'])

index size:  10000


## Hybrid queries examples

In [170]:
# Give me the top 10 reviews on action movies similar to mine
query_vector = np.random.rand(1, dim).astype(np.float32)
# query_vector = np.float32([n_vec for j in range(dim)])

q = redisearch.Query(f'(@{genre_field_name}:{{action}})=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)

[8944, 2779, 7440, 1843, 4103, 7873, 7799, 3008, 6813, 4109]
[13.7681503296, 13.9446144104, 14.7563505173, 14.8739566803, 14.9672689438, 14.97905159, 15.1214151382, 15.1551733017, 15.1591110229, 15.3667402267]


In [153]:
# Give me the top 10 reviews on action movies similar to mine that got ratings between 5-7.
# (ids 5000-7000)

q = redisearch.Query(f'(@{genre_field_name}:{{action}} @{rating_field_name}:[5 7])=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)

[6054, 6517, 5649, 5665, 5583, 6673, 5538, 5164, 6324, 6036]
[14.0194711685, 14.2707452774, 14.9074678421, 15.2483787537, 15.7517023087, 15.8709020615, 15.950252533, 16.0580673218, 16.0584373474, 16.0967102051]


In [154]:
# Give me the top 10 reviews on spiderman movie that are similar to mine and got ratings between 5-7.
#(ids 5000-7000 divided by 5)

q = redisearch.Query(f'(@{title_field_name}:spiderman @{rating_field_name}:[5 7])=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)

[5665, 6720, 5645, 5720, 5340, 6100, 5420, 5235, 5920, 6070]
[15.2483787537, 15.606221199, 16.6438274384, 16.8816299438, 16.8886756897, 16.9691009521, 17.2650661469, 17.3032188416, 17.3862438202, 17.4056377411]


In [None]:
# Give me the top 10 reviews on movies which aren't spiderman that are similar to mine.
#(all ids which are not divided by 5)

q = redisearch.Query(f'(@{genre_field_name}:{{action}} -@{title_field_name}:spider*)=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)

In [156]:
# Give me the top 10 reviews which are on "spiderman" movie, or movies with at least 9 rating.
#(ids which are divided by 5 or above 9000)

q = redisearch.Query(f'((@{title_field_name}:spiderman) | (@{rating_field_name}:[9 inf])=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)

[3210, 9683, 5665, 9711, 9188, 8355, 945, 3235, 4035, 9777]
[14.840921402, 14.9987468719, 15.2483787537, 15.3038034439, 15.4180278778, 15.675069809, 15.932050705, 16.0958003998, 16.2555809021, 16.2901725769]
