# Text Summarization - with LLM

In [7]:
import json
import os
import yaml
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import PyPDF2
from tqdm import tqdm
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader ,TextLoader

def initialize_template():
    template ="""You are a text summarizer, summarize the text provided, do to repeat the response.
    text:
    {content}"""
    prompt = PromptTemplate.from_template(template)
    return prompt 

def initialize_gpt():
    from langchain.chat_models import AzureChatOpenAI    
    %reload_ext dotenv
    %dotenv
    llm = AzureChatOpenAI(deployment_name=os.getenv('DEPLOYEMENT_NAME'),
                        temperature=0.0,
                        max_tokens=4000,
                        callbacks=[StreamingStdOutCallbackHandler()]
                        )
    return llm

def document_loader(doc_path):
    loader = PyPDFLoader(doc_path)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200, separator="\n")
    docs = text_splitter.split_documents(documents=documents)
    return docs

In [2]:
doc_path='./data/doc/why_is_DG_imp.pdf'
docs = document_loader(doc_path)

In [8]:
prompt = initialize_template()
prompt.format(content='this is my content')

'You are a text summarizer, summarize the text provided, do to repeat the response.\n    text:\n    this is my content'

In [5]:
llm = initialize_gpt()
chain = LLMChain(llm=llm,prompt=prompt)



In [6]:
summary=''
for chunk in tqdm(docs):
    summary = summary+str(chunk)
    summary = chain.invoke({"content":summary})['text']
print(summary)

100%|██████████| 9/9 [01:18<00:00,  8.67s/it]

Data governance is crucial for improving an organization's data quality, trust, and understanding by establishing stewardship and a comprehensive business glossary. It is key to digital transformation, ensuring consistent and secure data management according to specific policies and standards. This process enhances data management, accessibility, and collaboration across departments by standardizing terminology, KPIs, rules, and policies. It also breaks down departmental silos, clarifies roles, and improves IT and business integration, empowering users with self-service technology and strategic technology use. The importance of data governance is growing due to its critical role in business operations, driven by an increased reliance on data analytics for decision-making. This need is supported by the demand for organized, clean data for optimal performance, cost efficiency, legal compliance, and improved financial reporting. The evolving business landscape, with more complex data, new


