In [6]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS 
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("cybersecurity.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
docs = text_splitter.split_documents(documents)

In [7]:
docs

[Document(metadata={'source': 'cybersecurity.txt'}, page_content='Cybersecurity: An Overview\n---------------------------\n\nCybersecurity refers to the practice of protecting systems, networks, and programs from digital attacks. These cyberattacks are usually aimed at accessing, changing, or destroying sensitive information; extorting money from users; or interrupting normal business processes.\n\nTypes of Cyber Threats:\n1. Malware - Software designed to damage or destroy a computer system.\n2. Phishing - Fraudulent emails designed to trick recipients into revealing sensitive information.\n3. Ransomware - Malware that locks files and demands payment for release.\n4. Man-in-the-Middle Attacks - Where attackers secretly intercept communication between two parties.\n5. Denial-of-Service (DoS) Attacks - Overwhelming systems to make them unavailable to users.'),
 Document(metadata={'source': 'cybersecurity.txt'}, page_content='Key Principles of Cybersecurity:\n- **Confidentiality**: Ensur

In [14]:
embeddings = OllamaEmbeddings(model="gemma:2b")
db=FAISS.from_documents(docs, embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x237858c72d0>

In [13]:
query="What is a document tells about?"
docs=db.similarity_search(query)
docs[0].page_content

'Cybersecurity: An Overview\n---------------------------\n\nCybersecurity refers to the practice of protecting systems, networks, and programs from digital attacks. These cyberattacks are usually aimed at accessing, changing, or destroying sensitive information; extorting money from users; or interrupting normal business processes.\n\nTypes of Cyber Threats:\n1. Malware - Software designed to damage or destroy a computer system.\n2. Phishing - Fraudulent emails designed to trick recipients into revealing sensitive information.\n3. Ransomware - Malware that locks files and demands payment for release.\n4. Man-in-the-Middle Attacks - Where attackers secretly intercept communication between two parties.\n5. Denial-of-Service (DoS) Attacks - Overwhelming systems to make them unavailable to users.'

In [16]:
retriever=db.as_retriever()
docs = retriever.invoke(query)
docs[0].page_content

'Cybersecurity: An Overview\n---------------------------\n\nCybersecurity refers to the practice of protecting systems, networks, and programs from digital attacks. These cyberattacks are usually aimed at accessing, changing, or destroying sensitive information; extorting money from users; or interrupting normal business processes.\n\nTypes of Cyber Threats:\n1. Malware - Software designed to damage or destroy a computer system.\n2. Phishing - Fraudulent emails designed to trick recipients into revealing sensitive information.\n3. Ransomware - Malware that locks files and demands payment for release.\n4. Man-in-the-Middle Attacks - Where attackers secretly intercept communication between two parties.\n5. Denial-of-Service (DoS) Attacks - Overwhelming systems to make them unavailable to users.'

### Similarity search with score
similarity_search_score allows us to return not only docs but also the score that we get from the query and context.The distance is the cosine distance.A lower score indicates a higher similarity — meaning the document is more relevant to the query.

In [18]:
docs_and_scores = db.similarity_search_with_score(query)
docs_and_scores

[(Document(id='5a519d43-a92e-4e7c-9817-3b23a728fba1', metadata={'source': 'cybersecurity.txt'}, page_content='Cybersecurity: An Overview\n---------------------------\n\nCybersecurity refers to the practice of protecting systems, networks, and programs from digital attacks. These cyberattacks are usually aimed at accessing, changing, or destroying sensitive information; extorting money from users; or interrupting normal business processes.\n\nTypes of Cyber Threats:\n1. Malware - Software designed to damage or destroy a computer system.\n2. Phishing - Fraudulent emails designed to trick recipients into revealing sensitive information.\n3. Ransomware - Malware that locks files and demands payment for release.\n4. Man-in-the-Middle Attacks - Where attackers secretly intercept communication between two parties.\n5. Denial-of-Service (DoS) Attacks - Overwhelming systems to make them unavailable to users.'),
  3170.4116),
 (Document(id='8826b245-056f-40b7-810b-4f0fd33582a1', metadata={'sourc