In [1]:
import redis
r = redis.Redis(host='localhost', port=6379, decode_responses=True)

In [2]:
from sentence_transformers import SentenceTransformer
from redis.commands.search.query import Query
from redis.commands.search.field import TextField, TagField, VectorField
from redis.commands.search.index_definition import IndexDefinition, IndexType
from redis.commands.json.path import Path

import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [4]:
try:
    r.ft("vector_idx").dropindex(True)
except redis.exceptions.ResponseError:
    pass

In [7]:
schema = (
    TextField("content"),
    VectorField("embedding", "HNSW", {
        "TYPE": "FLOAT32",
        "DIM": 384,
        "DISTANCE_METRIC":"L2"
    })
)

r.ft("vector_idx").create_index(
    schema,
    definition=IndexDefinition(
        prefix=["doc:"], index_type=IndexType.HASH
    )
)


'OK'

In [6]:
import pandas as pd
data = pd.read_csv("Assignment_Data_Base.csv")

In [8]:
data.head()

Unnamed: 0,#,Sentence
0,1,Type 1 diabetes is an autoimmune condition cau...
1,2,Type 2 diabetes involves insulin resistance an...
2,3,Gestational diabetes develops in pregnancy and...
3,4,A fasting plasma glucose ≥ 126 mg/dL on two oc...
4,5,Hypoglycaemia is defined as blood glucose < 70...


In [10]:
k=0
for i in data['Sentence']:
    r.hset(f"doc:{k}", mapping={
        "content": i,
        "embedding": model.encode(i).astype(np.float32).tobytes(),
    })
    k += 1
    print(k, end="\r")

60

In [15]:
q = Query(
    "*=>[KNN 3 @embedding $vec AS vector_distance]"
).return_field("score").dialect(2)

query_text = input()

res = r.ft("vector_idx").search(
    q, query_params={
        "vec": model.encode(query_text).astype(np.float32).tobytes()
    }
)

print(res)


Result{3 total, docs: [Document {'id': 'doc:41', 'payload': None}, Document {'id': 'doc:42', 'payload': None}, Document {'id': 'doc:43', 'payload': None}]}
