# Vector Similarity for RediSearch - Range Queries


## Python examples

### Packages

In [2]:
import numpy as np
from redis import Redis
from redis.commands.search.field import VectorField, TagField, NumericField
from redis.commands.search.query import Query

### Create redis client

In [3]:
host = "localhost"
port = 6379

redis_conn = Redis(host = host, port = port)

In [4]:
# Index fields and configurations

n_vec = 10000
dim = 128
vector_field_name = "vector"
rating_field_name = "rating"
k = 10

In [18]:
def load_data(client : Redis, n, d):
    for i in range(1, n+1):
        np_vector = np.random.rand(1, d).astype(np.float32)
        client.hset(i, mapping = {vector_field_name: np_vector.tobytes(),
                                    rating_field_name: 10*(i/n)})  # ratings ranges from 0-10, proportional to the id)
        
def delete_data(client: Redis):
    client.flushall()
    
def print_results(res):
    docs = [int(doc.id) for doc in res.docs]
    dists = [float(doc.dist) if hasattr(doc, 'dist') else '-' for doc in res.docs]
    print(f"got {len(docs)} doc ids: ", docs)
    print("\ndistances: ", dists)
        

### Create VECTOR index with meta-data

In [19]:
# Build index - use FLAT vector index
delete_data(redis_conn)

schema = (VectorField(vector_field_name, "FLAT", {"TYPE": "FLOAT32", "DIM": dim, "DISTANCE_METRIC": "COSINE"}),
          NumericField(rating_field_name))
redis_conn.ft().create_index(schema)
redis_conn.ft().config_set("default_dialect", 2)

# load vectors with meta-data
np.random.seed(42)
load_data(redis_conn, n_vec, dim)

print("Index size: ", redis_conn.ft().info()['num_docs'])

query_vector = np.random.rand(1, dim).astype(np.float32)  # This is my query vector

Index size:  10000


### Basic query

In [9]:
# Give me the top 10 docs with vectors similar to mine

q = Query(f'*=>[KNN 10 @{vector_field_name} $vec_param]=>{{$yield_distance_as: dist}}').sort_by(f'dist')
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes()})

print_results(res)

got 10 doc ids:  [6268, 9386, 9572, 83, 8770, 9353, 4196, 3492, 3849, 2826]

distances:  [0.161894202232, 0.169060647488, 0.169191241264, 0.169765532017, 0.1706802845, 0.175678133965, 0.17847943306, 0.178663253784, 0.179469048977, 0.180303454399]


## Range queries examples

In [10]:
# Give me all the docs with vector that are distant from mine in up to 0.2 (Cosine metric)

radius = 0.2
q = Query(f'@{vector_field_name}:[VECTOR_RANGE {radius} $vec_param]=>{{$yield_distance_as: dist}}').sort_by(f'dist').paging(0, n_vec)
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes()})

print_results(res)

got 111 doc ids:  [6268, 9386, 9572, 83, 8770, 9353, 4196, 3492, 3849, 2826, 5194, 7019, 4137, 5832, 1324, 1290, 944, 3213, 5126, 1537, 5765, 1057, 6741, 7403, 9526, 2100, 7264, 3055, 8304, 557, 8400, 3655, 1484, 7767, 9396, 3873, 2995, 2977, 3127, 5085, 4614, 7143, 5595, 8647, 7619, 8014, 1562, 9584, 6573, 5487, 1033, 5149, 9719, 9768, 4695, 8093, 4290, 6325, 7836, 4437, 4208, 3949, 2173, 9820, 4479, 7495, 4533, 7855, 9232, 7405, 4331, 9331, 6216, 2348, 4467, 830, 4720, 2343, 4805, 9940, 2888, 1463, 6251, 6409, 216, 5663, 977, 8596, 4913, 3007, 7476, 2937, 9668, 9838, 5239, 2437, 82, 6553, 2124, 9092, 6285, 2842, 9853, 9964, 2563, 6421, 5708, 1973, 5460, 9120, 3814]

distances:  [0.161894202232, 0.169060647488, 0.169191241264, 0.169765532017, 0.1706802845, 0.175678133965, 0.17847943306, 0.178663253784, 0.179469048977, 0.180303454399, 0.181306004524, 0.18168592453, 0.181744813919, 0.182591021061, 0.183749914169, 0.18432611227, 0.185025274754, 0.185458660126, 0.185709357262, 0.185975015

In [12]:
# Give me the docs with vector that are distant from mine in up to 0.2 and has ratings between 7-10 (ids 7000-10000)

radius = 0.2
q = Query(f'@{rating_field_name}:[7 10] @{vector_field_name}:[VECTOR_RANGE {radius} $vec_param]=>{{$yield_distance_as: dist}}').sort_by(f'dist').paging(0, n_vec)
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes()})

print_results(res)

got 36 doc ids:  [9386, 9572, 8770, 9353, 7019, 7403, 9526, 7264, 8304, 8400, 7767, 9396, 7143, 8647, 7619, 8014, 9584, 9719, 9768, 8093, 7836, 9820, 7495, 7855, 9232, 7405, 9331, 9940, 8596, 7476, 9668, 9838, 9092, 9853, 9964, 9120]

distances:  [0.169060647488, 0.169191241264, 0.1706802845, 0.175678133965, 0.18168592453, 0.186758160591, 0.186990261078, 0.18792784214, 0.188211143017, 0.188735723495, 0.189835548401, 0.190104186535, 0.190932095051, 0.190996110439, 0.191105127335, 0.191147983074, 0.191713750362, 0.193222820759, 0.193285346031, 0.193367004395, 0.194308519363, 0.194760024548, 0.195213794708, 0.195500552654, 0.195551872253, 0.195579707623, 0.19584941864, 0.196377813816, 0.197579205036, 0.197833895683, 0.198147058487, 0.198235690594, 0.198658049107, 0.19896364212, 0.19903588295, 0.199796319008]


In [15]:
# Give me either docs with vectors that are similar to mine in up to 0.18 OR have rating above 9.9 (ids 9900-10000)

radius = 0.18
q = Query(f'@{rating_field_name}:[9.9 10] | @{vector_field_name}:[VECTOR_RANGE {radius} $vec_param]=>{{$yield_distance_as: dist}}').sort_by(f'dist').paging(0, n_vec)
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes()})

print_results(res)

got 110 doc ids:  [6268, 9386, 9572, 83, 8770, 9353, 4196, 3492, 3849, 9900, 9901, 9902, 9903, 9904, 9905, 9906, 9907, 9908, 9909, 9910, 9911, 9912, 9913, 9914, 9915, 9916, 9917, 9918, 9919, 9920, 9921, 9922, 9923, 9924, 9925, 9926, 9927, 9928, 9929, 9930, 9931, 9932, 9933, 9934, 9935, 9936, 9937, 9938, 9939, 9940, 9941, 9942, 9943, 9944, 9945, 9946, 9947, 9948, 9949, 9950, 9951, 9952, 9953, 9954, 9955, 9956, 9957, 9958, 9959, 9960, 9961, 9962, 9963, 9964, 9965, 9966, 9967, 9968, 9969, 9970, 9971, 9972, 9973, 9974, 9975, 9976, 9977, 9978, 9979, 9980, 9981, 9982, 9983, 9984, 9985, 9986, 9987, 9988, 9989, 9990, 9991, 9992, 9993, 9994, 9995, 9996, 9997, 9998, 9999, 10000]

distances:  [0.161894202232, 0.169060647488, 0.169191241264, 0.169765532017, 0.1706802845, 0.175678133965, 0.17847943306, 0.178663253784, 0.179469048977, '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-

In [16]:
# Among docs with vectors whose distance from mine is lower than 0.2 OR have rating above 9.9, give me the top 10 whose vector is closest to some other beautiful vector.

radius = 0.2

beautiful_vec = np.random.rand(1, dim).astype(np.float32)  # other movie embedding

q = Query(f'(@{rating_field_name}:[9.9 10] | @{vector_field_name}:[VECTOR_RANGE {radius} $vec_param]=>{{$yield_distance_as: range_dist}})=>[KNN 10 @{vector_field_name} $knn_vec]=>{{$yield_distance_as: knn_dist}}').sort_by(f'knn_dist').paging(0, n_vec)
res = redis_conn.ft().search(q, query_params = {'vec_param': query_vector.tobytes(), 'knn_vec': beautiful_vec.tobytes()})

docs = [int(doc.id) for doc in res.docs]
range_dists = [float(doc.range_dist) if hasattr(doc, 'range_dist') else "-" for doc in res.docs]
knn_dists = [float(doc.knn_dist) if hasattr(doc, 'knn_dist') else "-" for doc in res.docs]

print("doc ids: ", docs)
print("\nrange distances: ", range_dists)
print("\nknn distances: ", knn_dists)

doc ids:  [9967, 5460, 6553, 3213, 9092, 6421, 4695, 9900, 1324, 9838]

range distances:  ['-', 0.199557423592, 0.198573112488, 0.185458660126, 0.198658049107, 0.199302434921, 0.193286716938, '-', 0.183749914169, 0.198235690594]

knn distances:  [0.163969039917, 0.166377544403, 0.172017991543, 0.172353506088, 0.179218530655, 0.181240260601, 0.181509137154, 0.181697070599, 0.185028076172, 0.185675978661]
