# Import Required Libraries
Import the necessary libraries, including langchain.llms.OpenAI, langchain.prompts.PromptTemplate, and pdfplumber.

In [None]:
# %pip install langchain

Collecting langchain
  Using cached langchain-0.3.25-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Using cached langchain_core-0.3.61-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.8 (from langchain)
  Using cached langchain_text_splitters-0.3.8-py3-none-any.whl.metadata (1.9 kB)
Collecting langsmith<0.4,>=0.1.17 (from langchain)
  Using cached langsmith-0.3.42-py3-none-any.whl.metadata (15 kB)
Collecting pydantic<3.0.0,>=2.7.4 (from langchain)
  Using cached pydantic-2.11.5-py3-none-any.whl.metadata (67 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl.metadata (9.6 kB)
Collecting requests<3,>=2 (from langchain)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting PyYAML>=5.3 (from langchain)
  Downloading PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl.metadata (2.1 kB)
Collecting tenacity!=8.4.0,<10.0.0,>=8.1.0

In [None]:
# Import necessary libraries
from langchain.llms import OpenAI  # For using OpenAI's language model
from langchain.prompts import PromptTemplate  # For creating prompt templates
import pdfplumber  # For extracting text from PDF files

# Extract Text from PDF
Define a function `extract_text_from_pdf(pdf_path)` to extract text from a PDF file using pdfplumber.

In [None]:
# Define a function to extract text from a PDF file
def extract_text_from_pdf(pdf_path):
    """
    Extracts text from a PDF file using pdfplumber.

    Args:
        pdf_path (str): Path to the PDF file.

    Returns:
        str: Extracted text from the PDF.
    """
    with pdfplumber.open(pdf_path) as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text()  # Extract text from each page
    return text  # Return the concatenated text from all pages

# Process Text with LangChain
Define a function `process_text_with_langchain(text, openai_api_key)` to process the extracted text using LangChain's OpenAI LLM and a prompt template.

In [None]:
# Define a function to process text with LangChain
def process_text_with_langchain(text, openai_api_key):
    """
    Processes the provided text using LangChain's OpenAI LLM and a prompt template.

    Args:
        text (str): The text to process.
        openai_api_key (str): OpenAI API key for authentication.

    Returns:
        str: The response generated by the OpenAI LLM.
    """
    # Initialize the OpenAI LLM with the provided API key
    llm = OpenAI(api_key=openai_api_key)
    
    # Define a prompt template
    prompt_template = PromptTemplate(
        input_variables=["text"],
        template="Analyze the following text and provide a summary:\n\n{text}"
    )
    
    # Format the prompt with the provided text
    prompt = prompt_template.format(text=text)
    
    # Generate a response using the LLM
    response = llm(prompt)
    
    return response  # Return the generated response

# Save Output to File
Define a function `save_output(text, output_path)` to save the processed text to a specified file.

In [None]:
# Define a function to save processed text to a file
def save_output(text, output_path):
    """
    Saves the provided text to a specified file.

    Args:
        text (str): The text to save.
        output_path (str): Path to the output file.

    Returns:
        None
    """
    with open(output_path, 'w', encoding='utf-8') as file:
        file.write(text)  # Write the text to the file

# Example Usage
Provide an example usage section where the functions are called sequentially to extract text from a PDF, process it with LangChain, and save the output to a file.

In [None]:
# Example Usage

# Define paths for the input PDF and output file
pdf_path = "example.pdf"  # Path to the input PDF file
output_path = "output.txt"  # Path to save the processed output

# Define the OpenAI API key
openai_api_key = "your_openai_api_key_here"  # Replace with your actual OpenAI API key

# Step 1: Extract text from the PDF
extracted_text = extract_text_from_pdf(pdf_path)

# Step 2: Process the extracted text with LangChain
processed_text = process_text_with_langchain(extracted_text, openai_api_key)

# Step 3: Save the processed text to a file
save_output(processed_text, output_path)