### Data Ingestion Using Langchain (Document Loaders)
https://docs.langchain.com/oss/python/integrations/document_loaders

In [1]:
from langchain_community.document_loaders import TextLoader

#### Text Loader

In [2]:
loader = TextLoader('requirements.txt')
req = loader.load()
req

[Document(metadata={'source': 'requirements.txt'}, page_content='langchain\nipykernel\nlangchain_community\nlangchain_core')]

#### PyPDF Loader

In [3]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('research.pdf')
pdf = loader.load()
pdf

[Document(metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2017-03-06T12:25:28+05:30', 'title': 'Paper Title (use style: paper title)', 'author': 'Susan S Mathew', 'keywords': 'formatting, style, referencing', 'moddate': '2017-03-06T12:25:28+05:30', 'source': 'research.pdf', 'total_pages': 3, 'page': 0, 'page_label': '1'}, page_content='Journal of  \nEngineering,  \nScience & \n Management  \nEducation \n \n \nPaper Title–Paper format for “Journal of Engineering Science & \nManagement Education” for A4 Page Size \nFirst Author1, Second Author2, Third Author3 \n1Organization name and place including country name  \n2Organization name and place including country name  \nE-mail1: email id  of the first author \nPaper History: Received: 1st Feb 2017                      Revised: 27th Feb 2017                      Accepted: 5th March 2017 \n \nAbstract: This electronic document is a “live” template. The various components of your paper [title

#### Web Base Loader

In [10]:
from langchain_community.document_loaders import WebBaseLoader
import bs4
web_loader = WebBaseLoader(web_path='https://lilianweng.github.io/posts/2023-06-23-agent/',bs_kwargs=dict(parse_only=bs4.SoupStrainer(
    class_=("post-title","post-meta")
)))
web_content = web_loader.load()
web_content

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='\n      LLM Powered Autonomous Agents\n    Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n')]

#### Arxiv Loader

In [22]:
from langchain_community.document_loaders import ArxivLoader
arxiv = ArxivLoader(query="1706.03762",load_max_docs=2,doc_content_chars_max=1000)
response = arxiv.load()
response

[Document(metadata={'Published': '2023-08-02', 'Title': 'Attention Is All You Need', 'Authors': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin', 'Summary': 'The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation 

#### Wikipedia Loader

In [3]:
from langchain_community.document_loaders import WikipediaLoader
wiki_loader = WikipediaLoader(query="Lionel Messi",load_max_docs=2)
docs = wiki_loader.load()
docs

[Document(metadata={'title': 'Lionel Messi', 'summary': 'Lionel Andrés "Leo" Messi (born 24 June 1987) is an Argentine professional footballer who plays as a forward for and captains both Major League Soccer club Inter Miami and the Argentina national team. Widely regarded as one of the greatest players in history, Messi has set numerous records for individual accolades won throughout his professional footballing career, including eight Ballon d\'Ors, six European Golden Shoes, and eight times being named the world\'s best player by FIFA. In 2025, he was named the All Time Men\'s World Best Player by the IFFHS. He is the most decorated player in the history of professional football having won 46 team trophies. Messi\'s records include most goals in a calendar year (91), most goals for a single club (672 for Barcelona), most goals in La Liga (474), most assists in international football (61), most goal contributions in the FIFA World Cup (21), and most goal contributions in the Copa Amé