# Cosine Similarity Search Comparison
Comparison of cosine distance scoring between Redis VSS against JSON, Redis VSS against Hash Sets, and Pinecone.

# Import Python Modules

In [None]:
! pip install redis openai python-dotenv openai[datalib] pinecone-client

# Start Redis Stack Docker Container

In [1]:
! docker compose up -d

[1A[1B[0G[?25l[+] Running 0/0
 ⠋ Network vss-cosine_default  Creating                                    [34m0.1s [0m
[?25h[1A[1A[0G[?25l[34m[+] Running 1/1[0m
 [32m✔[0m Network vss-cosine_default    [32mCreated[0m                                   [34m0.1s [0m
 ⠋ Container vss-cosine-redis-1  Creatin...                                [34m0.1s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Network vss-cosine_default    [32mCreated[0m                                   [34m0.1s [0m
 ⠿ Container vss-cosine-redis-1  Startin...                                [34m0.2s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Network vss-cosine_default    [32mCreated[0m                                   [34m0.1s [0m
 ⠿ Container vss-cosine-redis-1  Startin...                                [34m0.3s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Network vss-cosine_default    [32mCreated[0m                                   [34m0.1

# Create an embedding via OpenAI

In [3]:
import openai
from dotenv import load_dotenv
import os
from redis import from_url
from redis.commands.search.field import VectorField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.query import Query
import numpy as np
import pinecone
import time
from tqdm.notebook import tqdm as tqdm


load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
def get_vector(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return openai.Embedding.create(input = [text], model = model)['data'][0]['embedding']

text_1 = "a" * 20 + "b" * 20 + "c" * 20 + "d" * 20 + "e" * 20
vector_1 = get_vector(text_1)

# Create Redis Client Connection

In [4]:
client = from_url('redis://localhost:6379')
client.flushdb()

True

# Build Redis VSS Index against JSON-stored vector

In [5]:
schema_json = [ VectorField('$.vector', 
            "FLAT", 
            {   "TYPE": 'FLOAT32', 
                "DIM": len(vector_1), 
                "DISTANCE_METRIC": "COSINE"
            },  as_name='vector' )
        ]
idx_json_def = IndexDefinition(index_type=IndexType.JSON, prefix=['doc-json:'])
client.ft('idx-json').create_index(schema_json, definition=idx_json_def)

client.json().set('doc-json:1', '$', {"vector": vector_1})

True

# Build Redis VSS Index against Hash-stored vector

In [6]:
schema_hash = [ VectorField('vector', 
            "FLAT", 
            {   "TYPE": 'FLOAT32', 
                "DIM": len(vector_1), 
                "DISTANCE_METRIC": "COSINE"
            })
        ]
idx_hash_def = IndexDefinition(index_type=IndexType.HASH, prefix=['doc-hash:'])
client.ft('idx-hash').create_index(schema_hash, definition=idx_hash_def)

client.hset('doc-hash:1', mapping={'vector': np.array(vector_1, dtype=np.float32).tobytes()})

1

# Build Pinecone Index

In [7]:
pinecone.init(api_key=os.getenv("PINECONE_API_KEY"), environment="gcp-starter")
pinecone.create_index("test", dimension=len(vector_1), metric="cosine")
pindex = pinecone.Index("test")
pindex.upsert([{'id': 'doc:1', 'values': vector_1}])
while (pindex.describe_index_stats().total_vector_count < 1):
    time.sleep(3)

# Create Query Vector

In [8]:
text_2 = "z" * 20 + "y" * 20 + "x" * 20 + "w" * 20 + "v" * 20
vector_2 = get_vector(text_2)

# Run Redis/JSON vs Pinecone Cosine Test

In [9]:
q = Query('*=>[KNN 1 @vector $query_vec AS vector_score]')\
    .sort_by('vector_score')\
    .return_fields('vector_score')\
    .dialect(2)    
params = { "query_vec": np.array(vector_2, dtype=np.float32).tobytes() }
json_results = client.ft('idx-json').search(q, query_params=params)
redis_score = np.float32(json_results.docs[0]['vector_score'])
print(f"Redis cosine distance    :{redis_score}")

results = pindex.query(vector=vector_2, top_k=1, include_values=True)
pinecone_score = np.float32(1.0 - results['matches'][0]['score'])
print(f"Pinecone cosine distance :{pinecone_score}")

np_vec1 = np.array(vector_1, dtype=np.float32)
np_vec2 = np.array(vector_2, dtype=np.float32)
manual = np.float32(1.0 - np.dot(np_vec1, np_vec2))
print(f"Manual cosine distance   :{manual}\n")
print(f"Redis v. Manual Diff     :{abs(manual-redis_score):.10f} ")
print(f"Pinecone v. Manual Diff  :{abs(manual-pinecone_score):.10f} ")
print(f"Redis v. Pinecone Diff   :{abs(redis_score-pinecone_score):.10f} ")

Redis cosine distance    :0.0929868221282959
Pinecone cosine distance :0.09561246633529663
Manual cosine distance   :0.0929863452911377

Redis v. Manual Diff     :0.0000004768 
Pinecone v. Manual Diff  :0.0026261210 
Redis v. Pinecone Diff   :0.0026256442 


# Run Redis/Hash vs Pinecone Cosine Test

In [10]:
print("\n*** Redis/Hash vs Pinecone Cosine Test  ***")
hash_results = client.ft('idx-hash').search(q, query_params=params)
redis_score = np.float32(hash_results.docs[0]['vector_score'])
print(f"Redis cosine distance    :{redis_score}")
print(f"Pinecone cosine distance :{pinecone_score}")
print(f"Manual cosine distance   :{manual}\n")
print(f"Redis v. Manual Diff     :{abs(manual-redis_score):.10f} ")
print(f"Pinecone v. Manual Diff  :{abs(manual-pinecone_score):.10f} ")
print(f"Redis v. Pinecone Diff   :{abs(redis_score-pinecone_score):.10f} ")


*** Redis/Hash vs Pinecone Cosine Test  ***
Redis cosine distance    :0.0929868221282959
Pinecone cosine distance :0.09561246633529663
Manual cosine distance   :0.0929863452911377

Redis v. Manual Diff     :0.0000004768 
Pinecone v. Manual Diff  :0.0026261210 
Redis v. Pinecone Diff   :0.0026256442 


# Clean Up

In [11]:
pinecone.delete_index("test")

In [12]:
! docker compose down

[1A[1B[0G[?25l[+] Running 0/0
 ⠋ Container vss-cosine-redis-1  Stoppin...                                [34m0.1s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠙ Container vss-cosine-redis-1  Stoppin...                                [34m0.2s [0m
[?25h[1A[1A[0G[?25l[+] Running 0/1
 ⠹ Container vss-cosine-redis-1  Stoppin...                                [34m0.3s [0m
[?25h[1A[1A[0G[?25l[34m[+] Running 1/1[0m
 [32m✔[0m Container vss-cosine-redis-1  [32mRemoved[0m                                   [34m0.4s [0m
 ⠋ Network vss-cosine_default    Removing                                  [34m0.0s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Container vss-cosine-redis-1  [32mRemoved[0m                                   [34m0.4s [0m
 ⠙ Network vss-cosine_default    Removing                                  [34m0.1s [0m
[?25h[1A[1A[1A[0G[?25l[+] Running 1/2
 [32m✔[0m Container vss-cosine-redis-1  [32mRemoved[0m                         