In [2]:
import os
import time
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain_groq import ChatGroq

load_dotenv()

GROQ_API_KEY = os.getenv("groq_api_key")
llm = ChatGroq(groq_api_key=GROQ_API_KEY, model_name="Llama3-8b-8192")

In [3]:
loader = PyPDFLoader("notes.pdf")
docs = loader.load_and_split()
print(docs)

split_docs = RecursiveCharacterTextSplitter(chunk_size=6500, chunk_overlap=400).split_documents(docs)

[Document(metadata={'source': 'notes.pdf', 'page': 0}, page_content='Module 1 \nSUMARANI H  & VANITHA H N  \nAsst.  Professor, Dept of CSE,CBIT, Kolar. \nCHAPTER 1 \nIntroduction to Databases \nIntroduction \nA database is a collection of related data. \nA data mean known facts that can be recorded and that have implicit meaning. \nFor eg, consider the names, telephone numbers, and addresses of the people you know. You may have recorded this \ndata in an indexed address book or you may have stored it on a hard drive, using a personal computer and software such \nas Microsoft Access or Excel. This collection of related data with an implicit meaning is a database. \nA database has the following implicit properties: \n\uf0fc A database represents some aspect of the real world, sometimes called the miniworld or the universe \nofdiscourse (UoD). Changes to the miniworldare reflected in the database.  \n\uf0fc A database is a logically coherent collection of data with some inherent meaning. 

In [4]:
# Start timing the summarization
start_time_summarization = time.time()
chain = load_summarize_chain(
    llm=llm,
    chain_type="refine",
    verbose=True,
)

output_summary = chain({"input_documents": split_docs}, return_only_outputs=True)
end_time_summarization = time.time()
print(f"Time taken to summarize: {end_time_summarization - start_time_summarization:.2f} seconds")

# Print the output summary
print(output_summary)

  output_summary = chain({"input_documents": split_docs}, return_only_outputs=True)




[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Module 1 
SUMARANI H  & VANITHA H N  
Asst.  Professor, Dept of CSE,CBIT, Kolar. 
CHAPTER 1 
Introduction to Databases 
Introduction 
A database is a collection of related data. 
A data mean known facts that can be recorded and that have implicit meaning. 
For eg, consider the names, telephone numbers, and addresses of the people you know. You may have recorded this 
data in an indexed address book or you may have stored it on a hard drive, using a personal computer and software such 
as Microsoft Access or Excel. This collection of related data with an implicit meaning is a database. 
A database has the following implicit properties: 
 A database represents some aspect of the real world, sometimes called the miniworld or the universe 
ofdiscourse (UoD). Changes to the miniworldare reflected in the database

In [5]:
output_summary

with open("refine.txt","w") as f:
    f.write(str(output_summary))

In [6]:
from langchain.prompts import PromptTemplate

chunks_prompt = """
Summarize the below document:
document : '{text}'
summary:
"""

map_prompt_template = PromptTemplate(input_variable = ['text'],
                                     template = chunks_prompt)

In [7]:
final_prompt = """
Provide the final summary of the enitre speech with these important points.
add a motivation title, start the precise summary with an introduction and provide the summary in number
and points for the speech.
speech : {text}
"""

final_prompt_template = PromptTemplate(input_variable = ['text'],
                                       template = final_prompt)

In [9]:
start_time_summarization = time.time()
chain = load_summarize_chain(llm=llm, chain_type="map_reduce",combine_prompt = final_prompt_template, verbose = True)
output_summary = chain({"input_documents": split_docs}, return_only_outputs=True)
end_time_summarization = time.time()
print(f"Time taken to summarize: {end_time_summarization - start_time_summarization:.2f} seconds")



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Module 1 
SUMARANI H  & VANITHA H N  
Asst.  Professor, Dept of CSE,CBIT, Kolar. 
CHAPTER 1 
Introduction to Databases 
Introduction 
A database is a collection of related data. 
A data mean known facts that can be recorded and that have implicit meaning. 
For eg, consider the names, telephone numbers, and addresses of the people you know. You may have recorded this 
data in an indexed address book or you may have stored it on a hard drive, using a personal computer and software such 
as Microsoft Access or Excel. This collection of related data with an implicit meaning is a database. 
A database has the following implicit properties: 
 A database represents some aspect of the real world, sometimes called the miniworld or the universe 
ofdiscourse (UoD). Changes to the miniworldare reflected in the datab

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.
Token indices sequence length is longer than the specified maximum sequence length for this model (3951 > 1024). Running this sequence through the model will result in indexing errors




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Provide the final summary of the enitre speech with these important points.
add a motivation title, start the precise summary with an introduction and provide the summary in number
and points for the speech.
speech : Here is a concise summary:

A database is a collection of related data with implicit meaning, representing a specific aspect of the real world. A database has properties such as being logically coherent, designed for a specific purpose, and can be of any size or complexity. A Database Management System (DBMS) enables users to create, maintain, and share databases, defining data types, structures, and constraints, constructing and storing data, manipulating data through querying and updating, and sharing data among multiple users and applications.

Here is a concise summary:

The module introduces the concept of a Database System (DBS) and 

In [10]:
with open("r.txt","w") as f:
    f.write(str(output_summary))