In [19]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the API key from the .env file
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    raise ValueError("API key not found in the .env file. Please ensure it is set as OPENAI_API_KEY.")

# Initialize the LLM
llm = ChatOpenAI(model="gpt-4o-mini", openai_api_key=api_key)

print("Model initialized successfully!")


Model initialized successfully!


In [None]:
from langchain.document_loaders import PyMuPDFLoader, UnstructuredURLLoader, YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
import base64
import httpx
import re


# Function to load data from PDFs
def load_pdfs(file_paths):
    documents = []
    for file_path in file_paths:
        loader = PyMuPDFLoader(file_path)
        documents.extend(loader.load())
    return documents


# Function to load data from URLs
def load_urls(urls):
    documents = []  # Initialize the documents list
    loader = UnstructuredURLLoader(urls=urls)
    documents.extend(loader.load())
    return documents


# Function to load data from YouTube videos
def load_youtube(video_urls):
    documents = []  # Initialize the documents list
    video_id_pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
    for video_url in video_urls:
        match = re.search(video_id_pattern, video_url)
        if match:
            video_id = match.group(1)  # Extract video ID
            loader = YoutubeLoader(video_id)  # Use video ID directly
            documents.extend(loader.load())
        else:
            print(f"Invalid YouTube URL: {video_url}")
    return documents


# Function to process images
def process_images(image_urls):
    model = llm
    documents = []  # Initialize the documents list
    for image_url in image_urls:
        # Encode image as base64
        image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
        
        # Create message for the model
        message = HumanMessage(
            content=[
                {"type": "text", "text": "Describe the content of this image. "},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                },
            ],
        )
        
        # Get response from the model
        response = model.invoke([message])
        documents.append(Document(page_content=response.content))
    return documents


# Function to load all types of data
def load_all_data(pdf_paths, urls, youtube_links, image_urls):
    pdf_docs = load_pdfs(pdf_paths)
    url_docs = load_urls(urls)
    youtube_docs = load_youtube(youtube_links)
    image_docs = process_images(image_urls)
    
    all_documents = pdf_docs + url_docs + youtube_docs + image_docs
    return all_documents


# Example data inputs
pdf_files = ["pdf_files"]
web_urls = ["web_urls"]  # List of URLs
youtube_videos = ["youtube_videos_urls"]  # YouTube links
image_urls = ["image_urls"]  # URLs to images

# Load data
documents = load_all_data(pdf_files, web_urls, youtube_videos, image_urls)

print(len(documents))  




85


In [33]:
print(documents[83].page_content)

welcome to this beginner tutorial on langra where we're going to cover the theory and then a practical example of how you can get stuck in so langra what is it why should you use it and how do you use it langra is an AI agent building framework built by Lang chain it's highly flexible and it allows you to connect language models together in a way that the llm can control what happens next it's supported in two major programming languages Python and JavaScript and it was released last October and since then it's been increasing in popularity as an alternative to other agent building Frameworks like crew aai and Microsoft autogen let's take a look at an example of how it might be used let's take a customer support agent who asks a question to an online website chatbot for a bank the chatbot agent might then look up the customer information it might then update the customer details based on the conversation and it might also then want to make a transaction for the customer but because thi

In [25]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

In [36]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an intelligent assistant designed for answering questions "
    "based on retrieved context. Carefully read the provided context and "
    "use it exclusively to formulate your response. If the context does not "
    "contain enough information to answer the question, explicitly state: "
    "'I don't know based on the provided context.' "
    "Your responses should be clear, relevant, and limited to three sentences "
    "or fewer. Do not include information beyond the given context."
    "\n\n{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


In [42]:

results = rag_chain.invoke({"input": "what are different data types in python ?"})
print(results["answer"])


The different data types in Python include:

- **Strings** (e.g., `"python"`)
- **Numbers** (e.g., `10`, `15.5`)
- **Lists** (e.g., `["python", "variables"]`)
- **Tuples** (e.g., `("python", "variables")`)
- **Dictionaries** (e.g., `{"python": "variable"}`)
- **Sets** (e.g., `{1, 2, 3}`)
- **Boolean** (e.g., `True`, `False`)
