### RAG - Document Loaders

##### Boilerplate code

In [None]:
import langchain
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

load_dotenv()

google_api_key = os.getenv("GOOGLE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")

google_llm = ChatGoogleGenerativeAI(
    temperature=0, 
    model="gemini-2.0-flash", 
    api_key=google_api_key,
    max_tokens=200
)

openai_llm = ChatOpenAI(
    temperature=0, 
    model="gpt-4", 
    api_key=openai_api_key
)

##### TextLoader

In [None]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader('./docs_for_rag/coolie_large.txt')

documents = loader.load()

for document in documents:
    print(document)

##### CSVLoader

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(
    './docs_for_rag/cars.csv'
)

data = loader.load()

# print(data)

for document in data:
    print(document.page_content, "\n")

##### WebBaseLoader

In [None]:
from langchain_community.document_loaders import WebBaseLoader

# loader = WebBaseLoader("https://www.orkut.com/")
# docs = loader.load()

loader_multiple_pages = WebBaseLoader(
    ["https://www.orkut.com/", "https://google.com", "https://facebook.com", "https://linkedin.com", "https://x.com"]
)
docs = loader_multiple_pages.lazy_load()


for doc in docs:
    print(doc, "\n")

##### UnstructuredLoader - Loading Images

In [1]:
from langchain_unstructured import UnstructuredLoader

file_paths = [
    './docs_for_rag/images.jpeg',
    './docs_for_rag/nexon_brochure.pdf'
]

try:
    loader = UnstructuredLoader(file_paths)
    docs = loader.load()
    for doc in docs:
        if doc.page_content:
            print(doc.page_content, "\n")
        else:
            print("No text content found in the image")
except Exception as e:
    print(f"Error: {e}")

  from .autonotebook import tqdm as notebook_tqdm




INFO: Reading image file: ./docs_for_rag/images.jpeg ...
INFO: pikepdf C++ to Python logger bridge initialized


No text content found in the image
ex Tata New 

Tata Neon Al Variants New Prices 

INTRODUCING 2025 

More Style. More Safety. More Tech. 

THE NEXON PHILOSOPHY 

People are its true inspiration. 

People who think far ahead. 

Who go the extra mile. And stay ahead of the curve. 

The Nexon is simply the icon of this attitude. 

It's beyond just a car, 

It’s an aspiration. It’s an ideology. 

A belief in blazing new trails. 

And moving ahead. 

PERSONAS 

Find the Nexon that matches you 

FEARLESS 

CREATIVE 

No is never your answer. 

Your inner child is creative. 

To this adventure called life. 

Wide-eyed and inquisitive. 

Be it a long drive. 

The world is yours to explore. 

Or a cross-country drive. 

You drive your passion. 

The answer is always yes. 

PURE 

You live in the moment. 

Enjoying the smallest of joys. 

You pride in being yourself. 

And lead a life of ultimate sophistication. 

SMART 

Pragmatism is your thing. 

You believe in results. 

Smart work over ha

### And much more - Refer langchain document loader code webpage