# Vector Similarity for RediSearch - Hybrid queries


## Python examples

### Packages

In [96]:
!pip install git+https://github.com/redis/redis-py.git@master
!pip install numpy

Collecting git+https://github.com/redis/redis-py.git@master
  Cloning https://github.com/redis/redis-py.git (to revision master) to /tmp/pip-req-build-mdsmy453
  Running command git clone --filter=blob:none --quiet https://github.com/redis/redis-py.git /tmp/pip-req-build-mdsmy453
  Resolved https://github.com/redis/redis-py.git to commit afc83e11146a9cb4f7fdeedc3b7fec7e1fe2953c
  Preparing metadata (setup.py) ... [?25ldone


In [2]:
import numpy as np
from redis import Redis
import redisearch

### Create redis client

In [9]:
host = "localhost"
port = 6379

redis_conn = Redis(host = host, port = port)

In [10]:
n_vec = 10000
dim = 4
M = 20
EF = 100
vector_field_name = "vector"
title_field_name = "title"
genre_field_name = "genre"
rating_field_name = "rating"
k = 10

In [11]:
def load_vectors(client : Redis, n, d,  field_name):
    for i in range(1, n+1):
        np.random.seed(10)
        np_vector = np.float32(np.random.rand(1, d))
        if i%5 != 0:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),
                                      genre_field_name: "action",
                                      title_field_name: "matrix"})
        else:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),
                                      genre_field_name: "action, drama",
                                      title_field_name: "spiderman"})
        
def delete_data(client: Redis):
    client.flushall()
        

### Create HNSW index with meta-data

In [12]:
# build index
hnsw_index = redisearch.Client("my_hnsw_index", conn=redis_conn)
delete_data(redis_conn)
np.random.seed(10)
hnsw_index.redis.execute_command('FT.CREATE', "my_hnsw_index", 'SCHEMA',
                                 vector_field_name, 'VECTOR', 'HNSW', '8', 'TYPE', 'FLOAT32', 'DIM', dim, 'DISTANCE_METRIC', 'L2', 'EF_RUNTIME', EF,
                                 rating_field_name, 'NUMERIC',title_field_name, 'TEXT', genre_field_name, 'TAG')

# hnsw_index.redis.execute_command('FT.CREATE', "my_hnsw_index", 'SCHEMA',
#                                  vector_field_name, 'VECTOR', 'FLAT', '6', 'TYPE', 'FLOAT32', 'DIM', dim, 'DISTANCE_METRIC', 'L2',
#                                  rating_field_name, 'NUMERIC',title_field_name, 'TEXT', genre_field_name, 'TAG')
#load vectors
load_vectors(hnsw_index.redis, n_vec, dim, vector_field_name)

print("index size: ", hnsw_index.info()['num_docs'])

index size:  10000


## Hybrid queries examples

In [14]:
import time

# Give me the top 10 reviews on spiderman movie that are similar to mine and got ratings between 5-7.
#(ids 50000-70000 divided by 1000)

query_vector = np.float32(np.random.rand(dim))
q = redisearch.Query(f'(@{title_field_name}:spiderman @{rating_field_name}:[5 7])=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
start = time.time()
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})
print ("time: ", time.time()-start)

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
#print(rs_dists)

time:  0.0014214515686035156
[]


In [None]:
# Give me the top 10 reviews on action movies which aren't spiderman that are similar to mine.

q = redisearch.Query(f'(@{genre_field_name}:{{action}} -@{title_field_name}:spider*)=>[TOP_K 10 @{vector_field_name} $vec_param]').sort_by(f'__{vector_field_name}_score')
res = hnsw_index.search(q, query_params = {'vec_param': query_vector.tobytes()})

docs = [int(doc.id) for doc in res.docs]
rs_dists = [float(doc.__vector_score) for doc in res.docs]
print(docs)
print(rs_dists)