In [None]:
# !pip install -U langchain langchain_community numpy
# !pip install tqdm langchain_huggingface
# !pip install faiss-cpu
# !pip install uuid

In [None]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [31]:
demo_text = "Hello, it is a sunny day!"
len(embeddings_model.embed_query(demo_text))

768

In [32]:
import os
import faiss
import numpy as np
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from tqdm import tqdm
from uuid import uuid4

In [None]:
def get_faiss_vector_store(dimension: int = 768):
    # Initialize FAISS index
    
    index = faiss.IndexHNSWFlat(dimension)
    vector_store = FAISS(
        embedding_function=embeddings_model,
        index=index,
        docstore= InMemoryDocstore(),
        index_to_docstore_id={}
    )
    return vector_store



async def vectorize_documents(directory):
    faiss_vector_store = get_faiss_vector_store()

    # Iterate over all .txt files in the specified directory
    for filename in tqdm(os.listdir(directory)):
        if filename.endswith('.txt'):
            file_path = os.path.join(directory, filename)

            # Load document
            loader = TextLoader(file_path)
            document = loader.load()
            
            # Chunk the document
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
            chunks = text_splitter.split_documents(document)

            await faiss_vector_store.aadd_documents(chunks)


    return faiss_vector_store


In [36]:
directory_path = './docs/'  # Replace with your directory
faiss_vector_store = await vectorize_documents(directory_path)

 33%|███▎      | 1/3 [00:05<00:10,  5.49s/it]

['40a11c1b-5a47-42b5-9a4d-009e1358248e', '41316fe8-c08c-4992-b00f-96f63e356568', 'f854af04-de18-4b96-be63-defe8e335d28', '26558cc1-d0ba-4837-9d19-01902654eef8', 'a65db5b0-32cc-4bed-94db-f5530cd77ba5', '8026a1c9-f3c6-406b-a10f-5f25907bdd00', 'c0d25b3f-0128-4096-801a-937f01e13136', '6bb0ccca-a18e-404b-aced-3dc6fb20b6d3', '7d3cd571-450a-4bcc-b48c-001200634314', '045b3d84-5998-4239-9ad0-4659b22a462f', 'ccaa651e-9971-4278-ab68-04b6eedcdaae', '4a6e2b3c-0660-41cd-9884-0919eeb1b162', '5a460fb0-28ed-46a6-8eb1-4b24408eaf55', '2ba2ad74-fc50-4855-b53a-b7bf09714ba3', '26eecc8f-6e35-4b58-8f70-be3dd4b51a08', '5f7415ee-889d-4877-babe-6f39066ad0e6', '6abbd0bb-29ee-4bc1-8acc-ff9fb6d7c4e3', '40eb59cb-25b9-4010-b385-c8ee46819b88', '41868865-fde1-43c9-b77e-86a665918984', '135e1e4a-f100-4623-896e-c20ffbe41d15', 'bb14871d-e9f9-44cd-b893-16054a2b3f82', '16ec870d-5c9d-4ac6-9bf1-14b577366a78', 'b96ce4aa-2a43-4eb6-8b14-9c116c20056c', '3fbda7cf-b8e7-4f09-8158-290180f6a4f0', 'bb09bd46-15b2-4680-a99d-ce5bf62d62aa',

 67%|██████▋   | 2/3 [00:10<00:05,  5.05s/it]

['7c8fb603-5881-47da-849c-2c310bca6b2b', '3734d6f1-05b9-4239-860f-f63547824167', 'a5b3f7e5-bf24-47a9-99cf-5a556fd82c06', '40b91462-af74-4185-b780-c35df00ecf95', '8a4f947f-249a-494d-9b23-55d4a8a45911', '346a1634-3796-47eb-bda1-e78f2f73a0de', '3ad523b0-18b7-4c13-b212-9d108feea76c', 'f15d5e63-7ae9-4137-95fe-ba0d95efab8d', 'df8d6ef5-29bb-4b1d-aa7c-28e13f31688e', 'e133fa44-a338-474d-a847-2be263a3d461', 'd84e7618-375d-44e5-a892-a47385178a96', 'ddc4498b-a31c-47c2-85eb-674f990d5dff', 'fa187e95-63de-47fc-838a-5df289194771', '7174454b-68b6-40da-9819-d2fa51e509dc', 'ed13c003-00de-4fab-a6a9-0f684497f559', '2f3ea614-a2be-4313-8363-7eda1bff9293', '955bfd64-3d24-4c3e-a452-da56f7f78ad3', 'b2ea828a-3b54-48a8-818b-cd30724c18c3', 'e8e18b3e-da5d-4082-ac00-b135f069edf6', 'ce677eb3-5c0f-4b51-a10d-ab3c084c8502', 'd2e9e501-e8d9-465b-8737-ca2d3e7131a7', '947905b8-3ca1-41ce-acf3-40df015312fb', '4c00671b-e2e8-4305-b1dc-f2afb3abb96f', '1b5ffcd8-2eb8-454b-ae23-1943528811f6', 'ebce1905-9ceb-488a-b786-7c4719db32f9',

100%|██████████| 3/3 [00:12<00:00,  4.14s/it]

['0f71e535-7853-4165-9d8a-19bd2d8d730f', 'd0cc7835-467f-4d9a-b26a-87101da5683a', '7f613bb5-399b-4b56-a00f-e587703ec0bf', '2d0f78ca-db70-4a13-9e1a-e387e4f39355', 'b49d6137-ee96-477a-a211-6566dc008df2', '3f87fea6-6bc0-43e2-9b4b-e946de61c2c9', 'c18dfc23-02ec-4041-a34e-ebf31f8c06e3', '4ce3f48c-4842-4b1f-9880-ce050fd18d60', '729173fc-7c2d-4815-8636-663d393b5354', '0c6059d4-febc-497a-880e-f7854ba60ee0', 'a0797d81-7f7f-4be2-ad37-4f4c1d77f08d', 'a1dd84d2-9cb1-4cbc-b93b-e3156bd1a6ea', 'ae37b492-ad3a-4dbb-b883-5bb477ea0acc', 'e23bdb10-b07b-405c-99bf-c5a414101aae', '1699fe94-dbab-4486-b9b0-4076aa7bc378', '5e4236c4-f5e6-46d5-9c27-fd4673606d2c', '1372d476-df61-404a-bdc6-bbd1fb0fa777', 'dfea1ed7-96cf-4373-826f-9787941b9293', 'c6a753dc-185b-4044-92d0-ea06fc27a07a', '911edea7-c75c-4ab9-b686-18ef0e12333b', 'b49f820f-d908-45c7-9e60-b0c2c9509ed2', '4732adb8-f2f1-40f1-9049-35ad13737ddf', '1112bb0c-5ce6-41ba-88a2-8ceb48b0d8a4']





In [45]:
faiss_vector_store.index.ntotal

112

In [48]:
ans = await faiss_vector_store.asimilarity_search(query="Applications of Artifiical Intelligence?", k=7)
ans

[Document(id='7f613bb5-399b-4b56-a00f-e587703ec0bf', metadata={'source': './docs/ai.txt'}, page_content="Various subfields of AI research are centered around particular goals and the use of particular tools. The traditional goals of AI research include reasoning, knowledge representation, planning, learning, natural language processing, perception, and support for robotics.[a] General intelligence—the ability to complete any task performed by a human on an at least equal level—is among the field's long-term goals.[4] To reach these goals, AI researchers have adapted and integrated a wide range of techniques, including search and mathematical optimization, formal logic, artificial neural networks, and methods based on statistics, operations research, and economics.[b] AI also draws upon psychology, linguistics, philosophy, neuroscience, and other fields.[5]"),
 Document(id='0f71e535-7853-4165-9d8a-19bd2d8d730f', metadata={'source': './docs/ai.txt'}, page_content='Artificial intelligence

In [49]:
query = "Applications of Artifiical Intelligence?"
vectorized_query = np.array(embeddings_model.embed_query(query), dtype="float64")

ans = await faiss_vector_store.asimilarity_search_with_score_by_vector(embedding=vectorized_query, k=7)
ans

[(Document(id='7f613bb5-399b-4b56-a00f-e587703ec0bf', metadata={'source': './docs/ai.txt'}, page_content="Various subfields of AI research are centered around particular goals and the use of particular tools. The traditional goals of AI research include reasoning, knowledge representation, planning, learning, natural language processing, perception, and support for robotics.[a] General intelligence—the ability to complete any task performed by a human on an at least equal level—is among the field's long-term goals.[4] To reach these goals, AI researchers have adapted and integrated a wide range of techniques, including search and mathematical optimization, formal logic, artificial neural networks, and methods based on statistics, operations research, and economics.[b] AI also draws upon psychology, linguistics, philosophy, neuroscience, and other fields.[5]"),
  np.float32(0.80512697)),
 (Document(id='0f71e535-7853-4165-9d8a-19bd2d8d730f', metadata={'source': './docs/ai.txt'}, page_con

In [50]:
query = "Applications of Artifiical Intelligence?"
vectorized_query = embeddings_model.embed_query(query)

ans = await faiss_vector_store.asimilarity_search_with_score_by_vector(embedding=vectorized_query, k=7)
ans

[(Document(id='7f613bb5-399b-4b56-a00f-e587703ec0bf', metadata={'source': './docs/ai.txt'}, page_content="Various subfields of AI research are centered around particular goals and the use of particular tools. The traditional goals of AI research include reasoning, knowledge representation, planning, learning, natural language processing, perception, and support for robotics.[a] General intelligence—the ability to complete any task performed by a human on an at least equal level—is among the field's long-term goals.[4] To reach these goals, AI researchers have adapted and integrated a wide range of techniques, including search and mathematical optimization, formal logic, artificial neural networks, and methods based on statistics, operations research, and economics.[b] AI also draws upon psychology, linguistics, philosophy, neuroscience, and other fields.[5]"),
  np.float32(0.80512697)),
 (Document(id='0f71e535-7853-4165-9d8a-19bd2d8d730f', metadata={'source': './docs/ai.txt'}, page_con

# Architecture:
- All data in vector store
    - Need a database loading function
    - Expensive to embed everytime. So, store embeddings locally and just load them.
    - Use contextual retrieval chunks with context attached to the chunks.
- Use semantic cache to reduce load and latency
    - use naive way first: check with semantic search with 90% to 95% match.
    - Can also use 100% match : overcomes issue of only year change. e.g. who won fifa 2018? vs who won fifa 2022?
    - Can use caches like redis to do this
- Use a mix of lexical and semantic search
    - BM25 for lexical search
    - Cosine Similarity for semantic search
- Use re-ranking model to re-rank the results and pass top k to LLM
    - experiment with the k hyper-parameter
    - experiment with LLM used with different context window without context stuffing
- Host LLMs and embedding models with vLLM and serve them with low-latency.