In [6]:
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
from langchain.docstore.document import Document
from pprint import pprint
import faiss

In [7]:
# Initialize the Sentence Transformer model
embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Create a FAISS index
d = 384  # Dimension of the embeddings (for all-MiniLM-L6-v2, it's 384)
index = faiss.IndexFlatL2(d)  # L2 distance index

# Create an in-memory document store
documents = [
    Document(page_content="Artificial Intelligence and Machine Learning are transforming the tech industry.", metadata={"topic": "AI and ML"}),
    Document(page_content="Quantum Computing is a new paradigm that could revolutionize computing.", metadata={"topic": "Quantum Computing"}),
    Document(page_content="Blockchain technology provides a secure and decentralized way to record transactions.", metadata={"topic": "Blockchain"}),
    Document(page_content="5G networks are enabling faster and more reliable internet connectivity.", metadata={"topic": "5G Networks"}),
    Document(page_content="Virtual Reality and Augmented Reality are creating immersive experiences.", metadata={"topic": "VR and AR"})
]

docstore = InMemoryDocstore(documents={doc.metadata['topic']: doc for doc in documents})
index_to_docstore_id = {i: doc.metadata['topic'] for i, doc in enumerate(documents)}

# Create a LangChain FAISS vector store
vector_store = FAISS(
    index=index, 
    embedding_function=embedding_model, 
    docstore=docstore, 
    index_to_docstore_id=index_to_docstore_id
)

TypeError: InMemoryDocstore.__init__() got an unexpected keyword argument 'documents'

In [None]:

# Add documents to the vector store (Embedding and Indexing)
documents = [
    "Artificial Intelligence and Machine Learning are transforming the tech industry.",
    "Quantum Computing is a new paradigm that could revolutionize computing.",
    "Blockchain technology provides a secure and decentralized way to record transactions.",
    "5G networks are enabling faster and more reliable internet connectivity.",
    "Virtual Reality and Augmented Reality are creating immersive experiences."
]
metadatas = [
    {"topic": "AI and ML"}, 
    {"topic": "Quantum Computing"}, 
    {"topic": "Blockchain"}, 
    {"topic": "5G Networks"}, 
    {"topic": "VR and AR"}
]
ids = ["doc1", "doc2", "doc3", "doc4", "doc5"]

vector_store.add_texts(texts=documents, metadatas=metadatas, ids=ids)

# Query the vector store
query = "What are the advancements in next-generation networks?"
results = vector_store.similarity_search(query, k=2)

# Print the results
pprint(results)