In [1]:
from dotenv import load_dotenv
import os
from langchain.llms import AzureOpenAI
import textwrap
from IPython.display import display, HTML, JSON, Markdown
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

llm = AzureOpenAI(
    deployment_name=OPENAI_DEPLOYMENT_NAME,
    model_name=OPENAI_MODEL_NAME,
    api_version="2023-05-15",
    openai_api_base=OPENAI_DEPLOYMENT_ENDPOINT,
)



In [2]:
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader

fileName = "./data/Best-Practices-in-Change-Management-Full-Report-Digital-11th-Edition.pdf"
loader = PyPDFLoader(fileName)
pages = loader.load_and_split()
print("Number of pages: ", len(pages))

Number of pages:  367


In [3]:
map_prompt_template = """
                      Write a summary of this chunk of text that includes the main points and any important details.
                      {text}
                      """

map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])

combine_prompt_template = """
                      Write a concise summary of the following text delimited by triple backquotes.
                      Return your response in bullet points which covers the key points of the text.
                      ```{text}```
                      BULLET POINT SUMMARY:
                      """

combine_prompt = PromptTemplate(
    template=combine_prompt_template, input_variables=["text"]
)

In [4]:
map_reduce_chain = load_summarize_chain(
    llm,
    chain_type="map_reduce",
    map_prompt=map_prompt,
    combine_prompt=combine_prompt,
    return_intermediate_steps=True,
)

In [None]:
map_reduce_outputs = map_reduce_chain({"input_documents": pages})
print("Number of outputs: ", len(map_reduce_outputs))

In [6]:
import pandas as pd
from pathlib import Path as p
final_mp_data = []
for doc, out in zip(map_reduce_outputs["input_documents"], map_reduce_outputs["intermediate_steps"]):
    output = {}
    output["file_name"] = p(doc.metadata["source"]).stem
    output["file_type"] = p(doc.metadata["source"]).suffix
    output["page_number"] = doc.metadata["page"]
    output["chunks"] = doc.page_content
    output["concise_summary"] = out
    final_mp_data.append(output)

pdf_mp_summary = pd.DataFrame.from_dict(final_mp_data)
pdf_mp_summary = pdf_mp_summary.sort_values(
    by=["file_name", "page_number"]
)  # sorting the dataframe by filename and page_number
pdf_mp_summary.reset_index(inplace=True, drop=True)
pdf_mp_summary.head()

Unnamed: 0,file_name,file_type,page_number,chunks,concise_summary
0,Best-Practices-in-Change-Management-Full-Repor...,.pdf,0,11th EditionBest Practices In \nChange Manage...,\n\n The 11th edition of ...
1,Best-Practices-in-Change-Management-Full-Repor...,.pdf,1,© 2020 Prosci Inc. • 1\nProsci ® Best Pra...,Write a summary of this chunk of text that in...
2,Best-Practices-in-Change-Management-Full-Repor...,.pdf,3,© 2020 Prosci Inc. • 3\nProsci ® Best Pra...,Steps in Change Management Methodology ........
3,Best-Practices-in-Change-Management-Full-Repor...,.pdf,3,Chapter 6 : Methodology ........................,"""""""\n self.assertEqual(expected_output..."
4,Best-Practices-in-Change-Management-Full-Repor...,.pdf,4,4 • © 2020 Prosci Inc. \nChapter 8 : Change ...,"""""""\n \n # when\n summary = generate..."


In [7]:
index = 3
print("[Context]")
print(pdf_mp_summary["chunks"].iloc[index])
print("\n\n [Simple Summary]")
print(pdf_mp_summary["concise_summary"].iloc[index])
print("\n\n [Page number]")
print(pdf_mp_summary["page_number"].iloc[index])
print("\n\n [Source: file_name]")
print(pdf_mp_summary["file_name"].iloc[index])

[Context]
Chapter 6 : Methodology  ............................................................................................................................. 105
Initiating Change Management .............................................................................................................. 107
Application of Methodology  ................................................................................................................... 110
Methodologies Used  ............................................................................................................................... 114
Chapter 7 : CM Budget, Resources and Team Structure  ...................................................................... 119
Change Management Budget  ................................................................................................................ 121
Change Management Resources  ...........................................................................................