In [1]:
!pip install faiss-cpu chromadb

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m161.4 MB/s[0m  [33m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.1


In [23]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings

from langchain_text_splitters import CharacterTextSplitter


loader = TextLoader('speech.txt')
documents = loader.load()

text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=500,
    chunk_overlap=30
)
docs = text_splitter.split_documents(documents)

print("Number of chunks:", len(docs))

Created a chunk of size 639, which is longer than the specified 500
Created a chunk of size 814, which is longer than the specified 500
Created a chunk of size 925, which is longer than the specified 500
Created a chunk of size 664, which is longer than the specified 500
Created a chunk of size 740, which is longer than the specified 500
Created a chunk of size 539, which is longer than the specified 500
Created a chunk of size 600, which is longer than the specified 500
Created a chunk of size 600, which is longer than the specified 500
Created a chunk of size 618, which is longer than the specified 500
Created a chunk of size 518, which is longer than the specified 500
Created a chunk of size 536, which is longer than the specified 500
Created a chunk of size 883, which is longer than the specified 500
Created a chunk of size 844, which is longer than the specified 500
Created a chunk of size 1005, which is longer than the specified 500


Number of chunks: 51


In [24]:
from langchain_community.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

# 3. Embedding model
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-m3",
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True}
)

# 4. Create FAISS index
db = FAISS.from_documents(docs, embeddings)

print("FAISS DB created successfully!")





FAISS DB created successfully!


In [25]:
query = "what is AI"

docs = db.similarity_search(query)
docs[0].page_content

'Controversies\nGlossary\nvte\nArtificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.[1]'

In [27]:
retriver = db.as_retriever()
docs = retriver.invoke(query)
docs[0].page_content

'Controversies\nGlossary\nvte\nArtificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.[1]'

In [28]:
docs_score = db.similarity_search_with_score(query)
docs_score

[(Document(id='24b73196-1f74-4fae-a415-11419db56e6a', metadata={'source': 'speech.txt'}, page_content='Controversies\nGlossary\nvte\nArtificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.[1]'),
  0.6817432),
 (Document(id='778c5b5d-4098-4ef2-b210-7ee9f47657ec', metadata={'source': 'speech.txt'}, page_content='Various subfields of AI research are centered around particular goals and the use of particular tools. The traditional goals of AI research include learning, reasoning, knowledge representation, planning, natural language processing, perception, and support for robotics.[a] To re

In [30]:
embedding_vec = embeddings.embed_query(query)

docs_score = db.similarity_search_by_vector(embedding_vec)

docs_score[0]

Document(id='24b73196-1f74-4fae-a415-11419db56e6a', metadata={'source': 'speech.txt'}, page_content='Controversies\nGlossary\nvte\nArtificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.[1]')

In [31]:
db.save_local("Faiss_index")


In [32]:
new_db = FAISS.load_local(
    "Faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)

doc = new_db.similarity_search(query)
doc[0].page_content


'Controversies\nGlossary\nvte\nArtificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.[1]'