In [1]:
import faiss
import numpy as np
import pandas as pd
from scipy import spatial  # for calculating vector similarities for search
from scipy.spatial import distance
import ast  # for converting embeddings saved as strings back to arrays
from sentence_transformers import SentenceTransformer, util
import torch
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')

## Cosine Similairty

In [None]:
def strings_ranked_by_relatedness(
    query: str,
    stored_sentences,
    stored_embeddings
):
    query_embedding_response = model.encode(query,convert_to_tensor=True)

    top_k = min(5, len(stored_sentences))
    cos_scores = util.cos_sim(query_embedding_response, stored_embeddings)[0]
    top_results = torch.topk(cos_scores, k=top_k)

    for score, idx in zip(top_results[0], top_results[1]):
        print(stored_sentences[idx], "(Score: {:.4f})".format(score))
    


with open("embeddings.pkl", "rb") as fIn:
    stored_data = pickle.load(fIn)
    stored_sentences = stored_data["sentences"]
    stored_embeddings = stored_data["embeddings"]


query = "Alo ALo"

print(strings_ranked_by_relatedness(query, stored_sentences,stored_embeddings))

## Semantic Search

In [7]:
def strings_ranked_by_relatedness(
    query: str,
    stored_sentences,
    stored_embeddings
):
    query_embedding_response = model.encode(query,convert_to_tensor=True)

    top_k = min(5, len(stored_sentences))
    cos_scores = util.semantic_search(query_embedding_response, stored_embeddings,top_k=3)[0]
    print(cos_scores)

    results = []
    for score in cos_scores:
        corpus_id = score['corpus_id']
        sentence = stored_sentences[corpus_id]
        similarity_score = score['score']
        results.append({'text': sentence, 'score': similarity_score})

    return results



with open("embeddings.pkl", "rb") as fIn:
    stored_data = pickle.load(fIn)
    stored_sentences = stored_data["sentences"]
    stored_embeddings = stored_data["embeddings"]


query = "Alo ALo"

print(strings_ranked_by_relatedness(query, stored_sentences,stored_embeddings))

[{'corpus_id': 1, 'score': 0.028499558568000793}, {'corpus_id': 2, 'score': -0.02063983678817749}, {'corpus_id': 0, 'score': -0.11698201298713684}]
[{'text': '  We describe a new algorithm, the $(k,\\ell)$-pebble game with colors, and use\nit obtain a characterization of the family of $(k,\\ell)$-sparse graphs and\nalgorithmic solutions to a family of problems concerning tree decompositions of\ngraphs. Special instances of sparse graphs appear in rigidity theory and have\nreceived increased attention in recent years. In particular, our colored\npebbles generalize and strengthen the previous results of Lee and Streinu and\ngive a new proof of the Tutte-Nash-Williams characterization of arboricity. We\nalso present a new decomposition that certifies sparsity based on the\n$(k,\\ell)$-pebble game with colors. Our work also exposes connections between\npebble game algorithms and previous sparse graph algorithms by Gabow, Gabow and\nWestermann and Hendrickson.\n', 'score': 0.028499558568000