# Vector Similarity for RediSearch - Hybrid queries


## Python examples

### Packages

In [48]:
!pip install git+https://github.com/redis/redis-py.git@master
!pip install numpy

Collecting git+https://github.com/redis/redis-py.git@master
  Cloning https://github.com/redis/redis-py.git (to revision master) to /tmp/pip-req-build-ngxzk85f
  Running command git clone --filter=blob:none --quiet https://github.com/redis/redis-py.git /tmp/pip-req-build-ngxzk85f
  Resolved https://github.com/redis/redis-py.git to commit 9a02bfd569c632e3563ad4c4af1f363174ec1af8
  Preparing metadata (setup.py) ... [?25ldone


In [49]:
import numpy as np
from redis import Redis
import redisearch

### Create redis client

In [52]:
host = "localhost"
port = 6379

redis_conn = Redis(host = host, port = port)

In [53]:
n_vec = 10000
dim = 128
M = 40
EF = 200
vector_field_name = "vector"
title_field_name = "title"
genre_field_name = "genre"
rating_field_name = "rating"
k = 10

In [54]:
def load_vectors(client : Redis, n, d,  field_name):
    for i in range(1, n+1):
        np_vector = np.float32([i for j in range(dim)])
        if i%10 != 0:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),
                                      genre_field_name: "action",
                                      title_field_name: "matrix"})
        else:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),
                                      genre_field_name: "action, drama",
                                      title_field_name: "spiderman"})
        
def delete_data(client: Redis):
    client.flushall()
        

### Create HNSW index with meta-data

In [55]:
# build index
hnsw_index = redisearch.Client("my_hnsw_index", conn=redis_conn)
delete_data(redis_conn)
hnsw_index.redis.execute_command('FT.CREATE', "my_hnsw_index", 'SCHEMA',
                                 vector_field_name, 'VECTOR', 'HNSW', '8', 'TYPE', 'FLOAT32', 'DIM', dim, 'DISTANCE_METRIC', 'L2', 'EF_RUNTIME', EF,
                                 rating_field_name, 'NUMERIC',title_field_name, 'TEXT', genre_field_name, 'TAG')

#load vectors
load_vectors(hnsw_index.redis, n_vec, dim, vector_field_name)

print("index size: ", hnsw_index.info()['num_docs'])

index size:  10000


## Hybrid queries examples

In [57]:
# Give me the top 10 reviews on action movies similar to mine
query_vector = np.float32([n_vec for j in range(dim)])

q = redisearch.Query(f'(@{genre_field_name}:{{action}})=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)

[10000, 9999, 9998, 9997, 9996, 9995, 9994, 9993, 9992, 9991]
[0.0, 128.0, 512.0, 1152.0, 2048.0, 3200.0, 4608.0, 6272.0, 8192.0, 10368.0]


In [58]:
# Give me the top 10 reviews on action movies similar to mine that got ratings between 5-7.
# (ids 5000-7000)

q = redisearch.Query(f'(@{genre_field_name}:{{action}} @{rating_field_name}:[5 7])=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)

[7000, 6999, 6998, 6997, 6996, 6995, 6994, 6993, 6992, 6991]
[1152000000.0, 1152768128.0, 1153536128.0, 1154305152.0, 1155074048.0, 1155843200.0, 1156612224.0, 1157382272.0, 1158152192.0, 1158922368.0]


In [73]:
# Give me the top 10 reviews on spiderman movie that are similar to mine and got ratings between 5-7.
#(ids 5000-7000 divided by 10)

q = redisearch.Query(f'(@{title_field_name}:spiderman @{rating_field_name}:[5 7])=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)

[7000, 6990, 6980, 6970, 6960, 6950, 6940, 6930, 6920, 6910]
[1152000000.0, 1159692416.0, 1167411200.0, 1175154816.0, 1182924800.0, 1190719616.0, 1198540800.0, 1206386816.0, 1214259200.0, 1222156416.0]


In [74]:
# Give me the top 10 reviews on action movies which aren't spiderman that are similar to mine.

q = redisearch.Query(f'(@{genre_field_name}:{{action}} -@{title_field_name}:spider*)=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)

[9999, 9998, 9997, 9996, 9995, 9994, 9993, 9992, 9991, 9989]
[128.0, 512.0, 1152.0, 2048.0, 3200.0, 4608.0, 6272.0, 8192.0, 10368.0, 15488.0]
