In [None]:
%pip install langchain langchain_openai faiss-cpu 

In [3]:
"""
Sets the OPENAI_API_KEY environment variable to the provided value.

This code is used to configure the OpenAI API key, which is required to use the OpenAI API in your Python code. The API key is stored in the OPENAI_API_KEY environment variable, which can then be accessed by other parts of your code that need to interact with the OpenAI API.
"""
import os
import getpass
OPENAI_API_KEY = getpass.getpass()
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [28]:
"""
This code sets up the necessary components for a text summarization pipeline using the LangChain library and the OpenAI language model.

The `OpenAIEmbeddings` class is used to create embeddings for the input documents, which are then stored in a FAISS vector store. The `ChatOpenAI` class is used to initialize the OpenAI language model, which is then used in the `load_summarize_chain` function to create a summarization chain.

The `Document` class from LangChain is used to represent the input documents, and the `chain.run()` method is used to generate a summary of the documents.
"""
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
from langchain_openai import ChatOpenAI
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain_core.prompts import PromptTemplate

# Local documents
documents = [
    """
    Artificial intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think like humans and mimic their actions. The term may also be applied to any machine that exhibits traits associated with a human mind, such as learning and problem-solving. AI has become an essential part of the technology industry, helping to solve problems in fields ranging from healthcare to robotics and beyond.
    """,
    """
    Machine learning is a subset of AI that provides systems the ability to automatically learn and improve from experience without being explicitly programmed. It focuses on the development of computer programs that can access data and use it to learn for themselves. The process of learning begins with observations or data, such as examples, direct experience, or instruction, in order to look for patterns in data and make better decisions in the future based on the examples provided.
    """,
    """
    Deep learning is a specialized form of machine learning that involves neural networks with three or more layers. These neural networks attempt to simulate the behavior of the human brain in order to learn from large amounts of data. While a neural network with a single layer can still make approximate predictions, additional layers can help optimize accuracy. Deep learning drives many AI applications and services that improve automation, performing tasks without human intervention.
    """,
    """
    Natural language processing (NLP) is a field of AI that focuses on the interaction between computers and humans through natural language. The ultimate objective of NLP is to enable computers to understand, interpret, and generate human languages in a way that is valuable. NLP is used in a variety of applications including translation services, sentiment analysis, and chatbots. It plays a vital role in the development of virtual assistants like Siri and Alexa.
    """
]

# Convert documents to LangChain Document format
docs = [Document(page_content=text) for text in documents]

# Create embeddings for documents and store in FAISS
embedding_function = OpenAIEmbeddings()
vector_store = FAISS.from_documents(docs, embedding_function)

# Initialize the OpenAI LLM
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")

# Define the summarize chain
chain = load_summarize_chain(llm, chain_type="stuff")

# Run the summarization on the local documents
summary = chain.run(docs)

print("Summary:", summary)

Summary: Artificial intelligence (AI) is the simulation of human intelligence in machines, allowing them to think and act like humans. Machine learning is a subset of AI that enables systems to learn and improve from experience without explicit programming. Deep learning is a specialized form of machine learning that uses neural networks with multiple layers to learn from large amounts of data. Natural language processing (NLP) focuses on the interaction between computers and humans through language, enabling computers to understand and generate human languages. NLP is used in various applications such as translation services and virtual assistants.


In [25]:
prompt_template = """Write a concise summary of the following:
"{text}"
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain with custom prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Define StuffDocumentsChain with custom LLM chain
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

# Run the custom summarization chain
custom_summary = stuff_chain.run(docs)

print("Custom Summary:", custom_summary)

Custom Summary: Artificial intelligence (AI) is the simulation of human intelligence in machines, which can learn and solve problems. Machine learning is a subset of AI that allows systems to learn from data without explicit programming. Deep learning is a specialized form of machine learning that uses neural networks to mimic the human brain. Natural language processing (NLP) focuses on the interaction between computers and humans through language, enabling computers to understand and generate human languages. NLP is used in various applications such as translation services and virtual assistants.


# Summarizing for a PDF

In [19]:
"""
This code imports the necessary modules and classes from the langchain and gradio libraries to create a summarization application.

The `OpenAI` class from langchain is used to initialize a language model with a specified temperature parameter.

The `PromptTemplate` class from langchain is used to define a prompt template for the summarization task.

The `load_summarize_chain` function from langchain.chains.summarize is used to load a pre-defined summarization chain.

The `PyPDFLoader` class from langchain.document_loaders is used to load PDF documents for summarization.
"""
import gradio as gr
from langchain import OpenAI, PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader

llm = OpenAI(temperature=0)

In [20]:
"""
Summarizes the text content of a PDF document using a language model-based summarization chain.

Args:
    path (str): The file path of the PDF document to be summarized.

Returns:
    str: A summary of the text content of the PDF document.
"""

def summarize_pdf(path):

    summary = ""
    try:
        """
        Loads and splits PDF documents using the PyPDFLoader.
        
        Args:
            path (str): The file path of the PDF document to be loaded and split.
        
        Returns:
            A list of document chunks loaded from the PDF file.
        """
        loader = PyPDFLoader(path.name)
        """
        Loads and splits PDF documents using the PyPDFLoader.
        
        Args:
            path (str): The file path of the PDF document to be loaded and split.
        
        Returns:
            A list of document chunks loaded from the PDF file.
        """
        docs = loader.load_and_split()
        """
        Loads a summarization chain using the "map_reduce" chain type. The summarization chain is used to generate a summary of the provided documents.
        
        Args:
            llm (LLMChain): The language model to use for the summarization.
            chain_type (str): The type of summarization chain to use, in this case "map_reduce".
        
        Returns:
            A summarization chain that can be used to generate summaries of documents.
        """
        chain = load_summarize_chain(llm, chain_type="map_reduce")
        summary = chain.run(docs)
        prompt_template = """

        {text}

        SUMMARY:"""
        PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
        chain = load_summarize_chain(llm, chain_type="map_reduce", 
                                    map_prompt=PROMPT, combine_prompt=PROMPT)
    except:
        summary = "Something went wrong. \nPlease try with some other document."
    return summary

In [21]:
def upload_file(file):
    return file.name

In [22]:
def main():
    global input_pdf_path
    with gr.Blocks() as demo:
        file_output = gr.File()
        upload_button = gr.UploadButton("Click to Upload a File", file_types=["pdf"])
        upload_button.upload(upload_file, upload_button, file_output)

    output_summary = gr.Textbox(label="Summary")

    interface = gr.Interface(
        fn=summarize_pdf,
        inputs=[upload_button],
        outputs=[output_summary],
        title="PDF Summarizer",
        description="",
    )
    
    interface.launch()

if __name__ == "__main__":
    main()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


IMPORTANT: You are using gradio version 4.16.0, however version 4.29.0 is available, please upgrade.
--------
IMPORTANT: You are using gradio version 4.16.0, however version 4.29.0 is available, please upgrade.
--------


  warn_deprecated(
