In [1]:
%load_ext autoreload
%autoreload 2

from decouple import config
from qdrant_client import QdrantClient


QDRANT_URL = config('QDRANT_URL')
QDRANT_API_KEY = config('QDRANT_API_KEY')

In [2]:
from rag.utils import load_datasets


corpus_df, queries_df, qrels_df = load_datasets(10_000, 10_000)

corpus_df.size, queries_df.size, qrels_df.size

(1233, 1446, 1473)

In [3]:
qdrant_client = QdrantClient(
    url=QDRANT_URL, 
    api_key=QDRANT_API_KEY,
)

print(qdrant_client.get_collections())

collections=[]


In [29]:
from fastembed import (
    TextEmbedding, 
    SparseTextEmbedding, 
    LateInteractionTextEmbedding
)
from qdrant_client.models import (
    Distance,
    Fusion,
    VectorParams, 
    SparseVectorParams,
    SparseIndexParams,
    MultiVectorConfig,
    MultiVectorComparator,
    Modifier,
    HnswConfigDiff
)

from rag.models.config import (
    DenseModelConfig, 
    SparseModelConfig,
    RerankingModelConfig
)


dense_model_name = 'BAAI/bge-small-en-v1.5'
dense_model = TextEmbedding(model_name=dense_model_name)

sparse_model_name = 'Qdrant/bm25'
sparse_model = SparseTextEmbedding(
    model_name=sparse_model_name,
    k1=1.5,
    b=0.75
)

reranking_model_name = 'colbert-ir/colbertv2.0'
reranking_model = LateInteractionTextEmbedding(reranking_model_name)

dense_model_config = DenseModelConfig(
    name=dense_model_name,
    vector_params=VectorParams(
        size=384,
        distance=Distance.COSINE,
        hnsw_config=HnswConfigDiff(
            m=16,
            ef_construct=128,
            on_disk=True
        ),
        on_disk=True
    )
)

sparse_model_config = SparseModelConfig(
    name=sparse_model_name,
    sparse_vector_params=SparseVectorParams(
        index=SparseIndexParams(
            on_disk=True
        ),
        modifier=Modifier.IDF
    )
)

reranking_model_config = RerankingModelConfig(
    name=reranking_model_name,
    vector_params=VectorParams(
        size=128,
        distance=Distance.COSINE,
        hnsw_config=HnswConfigDiff(
            m=0     # disable HNSW
        ),
        on_disk=True,
        multivector_config=MultiVectorConfig(
            comparator=MultiVectorComparator.MAX_SIM,
        )
    )
)

In [30]:
from rag.models import (
    DenseSearch,
    SparseSearch,
    HybridFusionSearch, 
    HybridRerankingSearch, 
    Metadata
)

In [31]:
documents = [
    'FastEmbed is lighter than Transformers & Sentence-Transformers.',
    'FastEmbed is supported by and maintained by Qdrant.',
]
metadatas = [
    Metadata(
        id=i + 1,
        text=document
    )
    for i, document in enumerate(documents)
]

dense_embeddings = list(dense_model.embed(documents))
sparse_embeddings = list(sparse_model.embed(documents))
reranking_embeddings = list(reranking_model.embed(documents))

In [None]:
dense_search = DenseSearch(
    qdrant_client=qdrant_client, 
    dense_model_config=dense_model_config
)
dense_collection_name = 'dense_collection'

dense_search.create_collection(dense_collection_name)
dense_search.upsert(
    dense_collection_name,
    dense_embeddings,
    metadatas
)

In [None]:
sparse_search = SparseSearch(
    qdrant_client=qdrant_client, 
    sparse_model_config=sparse_model_config
)
sparse_collection_name = 'sparse_collection'

sparse_search.create_collection(sparse_collection_name)
sparse_search.upsert(
    sparse_collection_name,
    sparse_embeddings,
    metadatas
)

In [None]:
fusion_search = HybridFusionSearch(
    qdrant_client=qdrant_client, 
    dense_model_config=dense_model_config,
    sparse_model_config=sparse_model_config
)
fusion_collection_name = 'hybrid_collection'

fusion_search.create_collection(fusion_collection_name)
fusion_search.upsert(
    fusion_collection_name, 
    dense_embeddings, 
    sparse_embeddings,
    metadatas
)

In [None]:
reranking_search = HybridRerankingSearch(
    qdrant_client=qdrant_client, 
    dense_model_config=dense_model_config,
    sparse_model_config=sparse_model_config,
    reranking_model_config=reranking_model_config
)
reranking_collection_name = 'reranking_collection'

reranking_search.create_collection(reranking_collection_name)
reranking_search.upsert(
    reranking_collection_name, 
    dense_embeddings, 
    sparse_embeddings,
    reranking_embeddings,
    metadatas
)

In [48]:
query_document = 'Who is lighter than Transformers?'

query_dense_embedding = next(iter(dense_model.embed(query_document)))
query_sparse_embedding = next(iter(sparse_model.embed(query_document)))
query_reranking_embedding = next(iter(reranking_model.embed(query_document)))

In [54]:
dense_search.search(
    dense_collection_name,
    query_dense_embedding,
    1
)

[ScoredPoint(id='3504f198-d847-4c36-b439-a6fd7b224f44', version=0, score=0.7509401, payload={'id': 1, 'text': 'FastEmbed is lighter than Transformers & Sentence-Transformers.'}, vector=None, shard_key=None, order_value=None)]

In [57]:
sparse_search.search(
    sparse_collection_name,
    query_sparse_embedding,
    1
)

[ScoredPoint(id='42f2c334-6120-4266-b132-271a076c954c', version=0, score=4.1631217, payload={'id': 1, 'text': 'FastEmbed is lighter than Transformers & Sentence-Transformers.'}, vector=None, shard_key=None, order_value=None)]

In [52]:
fusion_search.search(
    fusion_collection_name,
    query_dense_embedding,
    query_sparse_embedding,
    Fusion.RRF,
    1
)

[ScoredPoint(id='fce372bd-176f-49cd-a060-bec7871654e4', version=0, score=1.0, payload={'id': 1, 'text': 'FastEmbed is lighter than Transformers & Sentence-Transformers.'}, vector=None, shard_key=None, order_value=None)]

In [58]:
reranking_search.search(
    reranking_collection_name,
    query_dense_embedding,
    query_sparse_embedding,
    query_reranking_embedding,
    2,
    1
)

[ScoredPoint(id='046933a2-cf6c-47b6-b641-a4d9d69403b7', version=0, score=5.8587246, payload={'id': 1, 'text': 'FastEmbed is lighter than Transformers & Sentence-Transformers.'}, vector=None, shard_key=None, order_value=None)]

In [None]:
import json
from fastembed.sparse import SparseEmbedding
from tokenizers import Tokenizer


def get_tokens_and_weights(sparse_embedding: SparseEmbedding, tokenizer: Tokenizer) -> dict:
    token_weight_dict = {}
    
    for i in range(len(sparse_embedding.indices)):
        token = tokenizer.decode([sparse_embedding.indices[i]])
        weight = sparse_embedding.values[i]
        token_weight_dict[token] = weight

    # sort by weights
    token_weight_dict = dict(sorted(token_weight_dict.items(), key=lambda item: item[1], reverse=True))
    
    return token_weight_dict


tokenizer = Tokenizer.from_pretrained('Qdrant/SPLADE_PP_en_v1')
index = 0
print(json.dumps(get_tokens_and_weights(sparse_embeddings[index], tokenizer), indent=4))

In [None]:
# TODO add nvidia API for LLM