# Vector Similarity for RediSearch - Hybrid queries


## Python examples

### Packages

In [7]:
import numpy as np
from redis import Redis
from redis.commands.search.field import VectorField, TagField, NumericField, TextField
from redis.commands.search.query import Query

### Create redis client

In [3]:
host = "localhost"
port = 6379

redis_conn = Redis(host = host, port = port)

In [4]:
# Index fields and configurations

n_vec = 10000
dim = 128
M = 40
EF = 200
vector_field_name = "vector"
title_field_name = "title"
genre_field_name = "genre"
rating_field_name = "rating"
k = 10

In [11]:
def load_docs(client : Redis, n, d):
    for i in range(1, n+1):
        np_vector = np.random.rand(1, d).astype(np.float64)
        if i%5 != 0:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),  # ratings ranges from 0-10, proportional the doc id
                                      genre_field_name: "action",
                                      title_field_name: "matrix"})
        else:
            client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                      rating_field_name: 10*(i/n),
                                      genre_field_name: "action, drama",
                                      title_field_name: "spiderman"})
        
def delete_data(client: Redis):
    client.flushall()
    
def print_results(res):
    docs = [int(doc.id) for doc in res.docs]
    dists = [float(doc.dist) if hasattr(doc, 'dist') else '-' for doc in res.docs]
    print(f"got {len(docs)} doc ids: ", docs)
    print("\ndistances: ", dists)
        

### Create HNSW index with meta-data

#### Every document in the index represent a movie review. The vector field is a text embedding of the review, while the other fields are some of the movie review metadata.

In [12]:
# build HNSW index
delete_data(redis_conn)

schema = (VectorField(vector_field_name, "HNSW", {"TYPE": "FLOAT64", "DIM": dim, "DISTANCE_METRIC": "L2"}),
          NumericField(rating_field_name), TagField(genre_field_name), TextField(title_field_name))
redis_conn.ft().create_index(schema)
redis_conn.ft().config_set("default_dialect", 2)

# load vectors with meta-data
np.random.seed(42)
load_docs(redis_conn, n_vec, dim)

print("index size: ", redis_conn.ft().info()['num_docs'])

query_vector = np.random.rand(1, dim).astype(np.float64)

index size:  10000


## Hybrid queries examples

In [15]:
# Give me the top 10 reviews on action movies similar to mine

q = Query(f'(@{genre_field_name}:{{action}})=>[KNN 10 @{vector_field_name} $vec_param AS dist]').sort_by('dist')
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes()})

print_results(res)

got 10 doc ids:  [8770, 9386, 83, 5126, 9572, 3492, 6268, 3949, 4437, 1057]

distances:  [13.346961015, 14.5484484676, 14.7082952384, 14.7560760961, 14.8371418493, 14.9124708649, 15.2173650941, 15.3307324484, 15.3791827069, 15.488778035]


In [17]:
# Give me the top 10 reviews on action movies similar to mine that got ratings between 5 and 7.
# (ids 5000-7000)

q = Query(f'(@{genre_field_name}:{{action}} @{rating_field_name}:[5 7])=>[KNN 10 @{vector_field_name} $vec_param AS dist]').sort_by('dist')
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes()})

print_results(res)

got 10 doc ids:  [5126, 6268, 5390, 5085, 6741, 6251, 5239, 5487, 5194, 5595]

distances:  [14.7560760961, 15.2173650941, 15.7935849617, 15.8196561477, 15.8495724098, 15.8533288875, 16.0165456443, 16.0417755318, 16.0750138339, 16.2356399735]


In [19]:
# Give me the top 10 reviews on a Spiderman movie that are similar to mine and got ratings between 5 and 7.
#(ids 5000-7000 divided by 5)

q = Query(f'(@{title_field_name}:spiderman @{rating_field_name}:[5 7])=>[KNN 10 @{vector_field_name} $vec_param AS dist]').sort_by('dist')
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes()})

print_results(res)

got 10 doc ids:  [5390, 5085, 5595, 6695, 6285, 5765, 6595, 5795, 5790, 5550]

distances:  [15.7935849617, 15.8196561477, 16.2356399735, 16.4198694829, 16.4199152798, 16.4874357724, 16.59035834, 16.657459116, 16.6816978817, 16.786226798]


In [21]:
# Give me the top 10 reviews on movies that aren't Spiderman that are similar to mine.
#(all ids which are not divided by 5)

q = Query(f'(@{genre_field_name}:{{action}} -@{title_field_name}:spider*)=>[KNN 10 @{vector_field_name} $vec_param AS dist]').sort_by('dist')
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes()})

print_results(res)

got 10 doc ids:  [9386, 83, 5126, 9572, 3492, 6268, 3949, 4437, 1057, 557]

distances:  [14.5484484676, 14.7082952384, 14.7560760961, 14.8371418493, 14.9124708649, 15.2173650941, 15.3307324484, 15.3791827069, 15.488778035, 15.4977867241]


In [23]:
# Give me the top 10 reviews on a "spiderman" movie or movies with at least a 9 rating.
#(ids which are divided by 5 or above 9000)

q = Query(f'((@{title_field_name}:spiderman) | (@{rating_field_name}:[9 inf]))=>[KNN 10 @{vector_field_name} $vec_param AS dist]').sort_by('dist')
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes()})

print_results(res)

got 10 doc ids:  [8770, 9386, 9572, 8400, 9396, 3655, 9526, 9353, 5390, 5085]

distances:  [13.346961015, 14.5484484676, 14.8371418493, 15.4953163405, 15.6169647311, 15.6970910686, 15.722931204, 15.7777110313, 15.7935849617, 15.8196561477]
