In [26]:
from langchain_community.document_loaders import  TextLoader
loader = TextLoader("speech.txt")
text_document = loader.load()
text_document


[Document(metadata={'source': 'speech.txt'}, page_content="According to democratic theorists, a free and open debate will usually lead to the best option being considered and will be more likely to avoid serious mistakes.\n\nDemocracy depends upon a literate, knowledgeable citizenry whose access to information enables it to participate as fully as possible in the public life of their society and to criticize unwise or tyrannical government officials or policies. Citizens and their elected representatives recognize that democracy depends upon the widest possible access to uncensored ideas, data, and opinions.\n\nFor a free people to govern themselves, they must be free to express themselves -- openly, publicly, and repeatedly; in speech and in writing.\n\nThe principle of free speech should be protected by a democracy's constitution, preventing the legislative or executive branches of government from imposing censorship.\n\nThe protection of free speech is a so-called negative right, si

In [27]:

from dotenv import load_dotenv
import os
from langchain_community.llms import Ollama
# Load environment variables from .env file
load_dotenv(dotenv_path="/home/mahdi/Desktop/Langchain/.env")
langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
if langsmith_api_key:
    os.environ["LANGSMITH_API_KEY"] = langsmith_api_key

langsmith_tracing = os.getenv("LANGSMITH_TRACING")
if langsmith_tracing:
    os.environ["LANGCHAIN_TRACING_V2"] = langsmith_tracing
llm = Ollama(
    model="llama2",
)

print("LangSmith key:", os.getenv("LANGSMITH_API_KEY"))
print("LangChain tracing V2:", os.getenv("LANGCHAIN_TRACING_V2"))

LangSmith key: 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
LangChain tracing V2: true


In [28]:
#web laoders
from langchain_community.document_loaders import WebBaseLoader
import bs4 

# Try with a broader selector or no specific parsing
loader = WebBaseLoader(
    "https://en.wikipedia.org/wiki/Khalid_ibn_al-Walid",
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer("div", {"id": "mw-content-text"})
    )
)

text_document = loader.load()
text_document



In [29]:
#pdf loader 
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("lecture-note-13.pdf")
docs = loader.load()
docs

[Document(metadata={'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-08-04T12:00:51+00:00', 'moddate': '2025-08-04T12:00:52+00:00', 'source': 'lecture-note-13.pdf', 'total_pages': 23, 'page': 0, 'page_label': '1'}, page_content='1\nMinimum Spanning Trees'),
 Document(metadata={'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-08-04T12:00:51+00:00', 'moddate': '2025-08-04T12:00:52+00:00', 'source': 'lecture-note-13.pdf', 'total_pages': 23, 'page': 1, 'page_label': '2'}, page_content='2\nProblem: Laying Telephone Wire\nCentral office'),
 Document(metadata={'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-08-04T12:00:51+00:00', 'moddate': '2025-08-04T12:00:52+00:00', 'source': 'lecture-note-13.pdf', 'total_pages': 23, 'page': 2, 'page_label': '3'}, page_content='3\nWiring: Naïve Approach\nCentral office\nExpensive!'),
 Document(metadata={'producer': '

splitting

In [38]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    
)
document = text_splitter.split_documents(docs)
document[:5]


[Document(metadata={'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-08-04T12:00:51+00:00', 'moddate': '2025-08-04T12:00:52+00:00', 'source': 'lecture-note-13.pdf', 'total_pages': 23, 'page': 0, 'page_label': '1'}, page_content='1\nMinimum Spanning Trees'),
 Document(metadata={'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-08-04T12:00:51+00:00', 'moddate': '2025-08-04T12:00:52+00:00', 'source': 'lecture-note-13.pdf', 'total_pages': 23, 'page': 1, 'page_label': '2'}, page_content='2\nProblem: Laying Telephone Wire\nCentral office'),
 Document(metadata={'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2025-08-04T12:00:51+00:00', 'moddate': '2025-08-04T12:00:52+00:00', 'source': 'lecture-note-13.pdf', 'total_pages': 23, 'page': 2, 'page_label': '3'}, page_content='3\nWiring: Naïve Approach\nCentral office\nExpensive!'),
 Document(metadata={'producer': '

vector_embedding and vector store

In [None]:
from langchain_community.embeddings import OllamaEmbeddings   # u can use OpenAIEmbeddings() or OllamaEmbeddings()
from langchain_community.vectorstores import chroma 

db = chroma.Chroma.from_documents(
    document,
    embedding=OllamaEmbeddings()
)


In [40]:
#vector database 
query = "What is the minimum spanning tree " 
result = db.similarity_search(query) 
result

[Document(metadata={'creator': 'Microsoft® PowerPoint® 2016', 'source': 'lecture-note-13.pdf', 'total_pages': 23, 'page': 14, 'producer': 'www.ilovepdf.com', 'moddate': '2025-08-04T12:00:52+00:00', 'page_label': '15', 'creationdate': '2025-08-04T12:00:51+00:00'}, page_content='15\nPrim’s algorithm\nVertex Parent\ne -\nb e\nc d\nd e\na d\na\nc\ne\nd\nb\n2\n45\n9\n6\n4\n5\n5\nThe final minimum spanning tree'),
 Document(metadata={'page_label': '9', 'total_pages': 23, 'page': 8, 'producer': 'www.ilovepdf.com', 'source': 'lecture-note-13.pdf', 'moddate': '2025-08-04T12:00:52+00:00', 'creationdate': '2025-08-04T12:00:51+00:00', 'creator': 'Microsoft® PowerPoint® 2016'}, page_content='9\nPrim’s Algorithm implementation\nInitialization\na. Pick a vertex r to be the root\nb. Set D(r) = 0, parent(r) = null\nc. For all vertices v \uf0ceV, v \uf0b9r, set D(v) = \uf0a5\nd. Insert all vertices into priority queue P, \nusing distances as the keys\na\nc\ne\nd\nb\n2\n45\n9\n6\n4\n5\n5\ne a b c d\n0 \u

In [41]:
##fiass vector db
from langchain_community.vectorstores import FAISS
db1 = FAISS.from_documents(
    document,
    embedding=OllamaEmbeddings()
)