In [1]:
import numpy as np

from numpy.linalg import norm
from sentence_transformers import SentenceTransformer

In [2]:
# Define the model we want to use (it'll download itself)
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

sentences = [
  "That is a very happy person",
  "That is a happy dog",
  "Today is a sunny day", 
  "You are a happy person",
]

# vector embeddings created from dataset
embeddings = model.encode(sentences)

In [3]:
# query vector embedding
query_embedding = model.encode("That is a happy person")

In [4]:
# define our distance metric
def cosine_similarity(a, b):
    return np.dot(a, b)/(norm(a)*norm(b))

# run semantic similarity search
print("Query: That is a happy person")
for e, s in zip(embeddings, sentences):
    print(s, " -> similarity score = ",
         cosine_similarity(e, query_embedding))

Query: That is a happy person
That is a very happy person  -> similarity score =  0.9429152
That is a happy dog  -> similarity score =  0.69457746
Today is a sunny day  -> similarity score =  0.2568761
You are a happy person  -> similarity score =  0.7832136


---

### Now with Feliks's data

In [5]:
with open('feliks_data.txt') as f:
    lines = f.readlines()

In [6]:
sentences = lines[0][1:-3].split("\', ")
sentences[:2]

["'Equality 🙋\\u200d♀️: Why we still need to fight for equality and people / companies who inspire me ✨",
 "'Melon 👷\\u200d♀️: Content related to building Melon 🧡🙌"]

In [7]:
embeddings = model.encode(sentences)

In [8]:
len(embeddings)

357

In [9]:
query_embedding = model.encode("UX from A to Z: You may wonder how it works")

In [10]:
results = {}
for e, s in zip(embeddings, sentences):
    results[s] = cosine_similarity(e, query_embedding)

In [11]:
results_top_10 = dict(sorted(results.items(), key=lambda item: -item[1])[:10])

In [12]:
print("Query = UX from A to Z: You may wonder how it works")
for k, v in results_top_10.items():
    print(f"{k} -> similarity score = {v}")

Query = UX from A to Z: You may wonder how it works
'UX Theory: Links to read about UX Design -> similarity score = 0.41197308897972107
'💙 UX/UI |  Articles: Stuff worth reading, looking at and revisiting!  -> similarity score = 0.3675541281700134
'Networking & leads through linkedin🌀: gathered gems from all over for me and you to be successful on linkedin -> similarity score = 0.313284307718277
'Learn: Business: Biz materials you might not have thought about. -> similarity score = 0.31268030405044556
'My ux tips : Tips&truk  -> similarity score = 0.3097940981388092
'Digital nomad : Everything about life as a digital nomad an how to achieve it🤞🏼 -> similarity score = 0.2970907688140869
'UX design: Great resources to learn and improve UX design -> similarity score = 0.288242906332016
'Photoshop: About photoshop -> similarity score = 0.2880149781703949
'Procreate tutorial 💘: Mastering your skills in procreate✨ -> similarity score = 0.28796523809432983
'Al tips: Shortcuts -> similarity sc

---

In [13]:
import os
from dotenv import load_dotenv

load_dotenv()

import redis
from redis.commands.search.field import TagField, VectorField
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.query import Query

In [14]:
r = redis.Redis(
  host='redis-13273.c266.us-east-1-3.ec2.cloud.redislabs.com',
  port=13273,
  password=os.getenv('REDIS_PASSWORD')
)

In [15]:
INDEX_NAME = "index" # Vector Index Name
DOC_PREFIX = "doc:" # RediSearch Key Prefix for the Index

In [16]:
r.ft(INDEX_NAME).dropindex(delete_documents=True)

b'OK'

In [17]:
r.flushdb()

True

In [18]:
def create_index(vector_dimensions: int):
    try:
        # check to see if index exists
        r.ft(INDEX_NAME).info()
        print("Index already exists!")
    except:
        # schema
        schema = (
            TagField("tag"),                       # Tag Field Name
            VectorField("vector",                  # Vector Field Name
                "FLAT", {                          # Vector Index Type: FLAT or HNSW
                    "TYPE": "FLOAT32",             # FLOAT32 or FLOAT64
                    "DIM": vector_dimensions,      # Number of Vector Dimensions
                    "DISTANCE_METRIC": "COSINE",   # Vector Search Distance Metric
                }
            ),
        )

        # index Definition
        definition = IndexDefinition(prefix=[DOC_PREFIX], index_type=IndexType.HASH)

        # create Index
        r.ft(INDEX_NAME).create_index(fields=schema, definition=definition)

In [19]:
# define vector dimensions
VECTOR_DIMENSIONS = len(query_embedding)

# create the index
create_index(vector_dimensions=VECTOR_DIMENSIONS)

In [20]:
# Write to Redis
pipe = r.pipeline()
for i, embedding in enumerate(embeddings):
    pipe.hset(f"doc:{i}", mapping = {
        "vector": embedding.tobytes(),
        "content": sentences[i],
        "tag": "mytag"
    })
res = pipe.execute()

In [21]:
query = (
    Query("(@tag:{ mytag })=>[KNN 10 @vector $vec as score]")
     .sort_by("score")
     .return_fields("content", "tag", "score")
     .paging(0, 10)
     .dialect(2)
)

query_params = {"vec": query_embedding.tobytes()}
r.ft(INDEX_NAME).search(query, query_params).docs

[Document {'id': 'doc:288', 'payload': None, 'score': '0.588027000427', 'content': "'UX Theory: Links to read about UX Design", 'tag': 'mytag'},
 Document {'id': 'doc:343', 'payload': None, 'score': '0.632445931435', 'content': "'💙 UX/UI |  Articles: Stuff worth reading, looking at and revisiting! ", 'tag': 'mytag'},
 Document {'id': 'doc:210', 'payload': None, 'score': '0.686715722084', 'content': "'Networking & leads through linkedin🌀: gathered gems from all over for me and you to be successful on linkedin", 'tag': 'mytag'},
 Document {'id': 'doc:56', 'payload': None, 'score': '0.687319755554', 'content': "'Learn: Business: Biz materials you might not have thought about.", 'tag': 'mytag'},
 Document {'id': 'doc:272', 'payload': None, 'score': '0.690205931664', 'content': "'My ux tips : Tips&truk ", 'tag': 'mytag'},
 Document {'id': 'doc:141', 'payload': None, 'score': '0.702909290791', 'content': "'Digital nomad : Everything about life as a digital nomad an how to achieve it🤞🏼", 'tag