In [15]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq

In [16]:
import os 
from dotenv import load_dotenv
load_dotenv()
os.environ["GROQ_API"] = os.getenv("GROQ_API")
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
os.environ["LANGSMITH_TRACING"] = os.getenv("LANGSMITH_TRACING")
os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")
os.environ["HF_TOKEN"] = os.getenv("HF_Token")

llm=ChatGroq(groq_api_key=os.getenv("GROQ_API"),model="openai/gpt-oss-20b")
embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={"device": "cpu"}
)

# method 1

In [17]:
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage #instractions for the llm
)

In [18]:
speech ='''"Thank you all for coming today.
I know life gets busy, and your time is valuable.
We’ve faced challenges together, and we’ve learned a lot.
Let’s keep pushing forward and support each other along the way."'''

In [19]:
chat_message = [
    SystemMessage(content="You are a expert with expertise in summarizing text."),
    HumanMessage(content=f"please provide a short and concisse summary of the follwoign speech:/n Text :{speech}"),
    
]

In [20]:
llm.get_num_tokens(speech)

55

In [21]:
llm(chat_message)

  llm(chat_message)


AIMessage(content='The speaker thanks attendees, acknowledges their busy schedules, reflects on shared challenges and lessons learned, and urges everyone to keep moving forward together.', additional_kwargs={'reasoning_content': 'The user wants a short and concise summary of the speech. The speech content: "Thank you all for coming today. I know life gets busy, and your time is valuable. We’ve faced challenges together, and we’ve learned a lot. Let’s keep pushing forward and support each other along the way."\n\nWe need to summarize concisely. The summary: "The speaker thanks attendees, acknowledges their busy schedules, reflects on shared challenges and lessons, and encourages continued progress and mutual support." That\'s concise. Let\'s produce that.'}, response_metadata={'token_usage': {'completion_tokens': 143, 'prompt_tokens': 148, 'total_tokens': 291, 'completion_time': 0.128415576, 'prompt_time': 0.009551813, 'queue_time': 0.084853582, 'total_time': 0.137967389}, 'model_name':

# method 2

In [22]:
# propmt template 
from langchain.chains import LLMChain  # llmchain = llm | prompt
from langchain.prompts import PromptTemplate

generic_template = ''' 
write a summary of the following speech : 
Speech : {speech}
Translate the precise summary to {language}


'''
prompt = PromptTemplate(
    input_variables=["speech", "language"],
    template=generic_template
)

prompt

PromptTemplate(input_variables=['language', 'speech'], input_types={}, partial_variables={}, template=' \nwrite a summary of the following speech : \nSpeech : {speech}\nTranslate the precise summary to {language}\n\n\n')

In [23]:
comp_prompt = prompt.format(speech=speech, language="french") 
comp_prompt 

' \nwrite a summary of the following speech : \nSpeech : "Thank you all for coming today.\nI know life gets busy, and your time is valuable.\nWe’ve faced challenges together, and we’ve learned a lot.\nLet’s keep pushing forward and support each other along the way."\nTranslate the precise summary to french\n\n\n'

In [24]:
llm.get_num_tokens(comp_prompt)

80

In [25]:
llm_chain = LLMChain(
    llm=llm,
    prompt=prompt
)

summary = llm_chain.run(speech=speech, language="french")
summary

  llm_chain = LLMChain(
  summary = llm_chain.run(speech=speech, language="french")


'**Summary (English)**  \nThe speaker thanks everyone for attending, acknowledges how busy people are, reflects on the challenges they have faced together and the lessons learned, and urges the group to keep moving forward while supporting one another.\n\n**Résumé (French)**  \nLe conférencier remercie tout le monde d’être présent, reconnaît le rythme chargé de chacun, évoque les défis rencontrés et les leçons tirées, et encourage le groupe à avancer tout en se soutenant mutuellement.'

# StuffDocumentChain Text Summarization

In [26]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Attention.pdf")
docs = loader.load_and_split()
docs

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'Attention.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszk

In [27]:
template = ''' 
 write a short summary of the following text, 
 Speech :{text}
'''
from langchain.prompts import PromptTemplate
prompt = PromptTemplate(input_variables=["text"] , template=template)

In [28]:
from langchain.chains.summarize import load_summarize_chain

chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt , verbose=True)
chain

StuffDocumentsChain(verbose=True, llm_chain=LLMChain(verbose=True, prompt=PromptTemplate(input_variables=['text'], input_types={}, partial_variables={}, template=' \n write a short summary of the following text, \n Speech :{text}\n'), llm=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x709cb0d82f30>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x709cb10a2780>, model_name='openai/gpt-oss-20b', model_kwargs={}, groq_api_key=SecretStr('**********')), output_parser=StrOutputParser(), llm_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_variable_name='text')

In [29]:
output_summary = chain.invoke(docs)
output_summary



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m 
 write a short summary of the following text, 
 Speech :Provided proper attribution is provided, Google hereby grants permission to
reproduce the tables and figures in this paper solely for use in journalistic or
scholarly works.
Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.com
Noam Shazeer∗
Google Brain
noam@google.com
Niki Parmar∗
Google Research
nikip@google.com
Jakob Uszkoreit∗
Google Research
usz@google.com
Llion Jones∗
Google Research
llion@google.com
Aidan N. Gomez∗ †
University of Toronto
aidan@cs.toronto.edu
Łukasz Kaiser∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗ ‡
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decod

APIStatusError: Error code: 413 - {'error': {'message': 'Request too large for model `openai/gpt-oss-20b` in organization `org_01k1web8ememaag4ax0tddjzna` service tier `on_demand` on tokens per minute (TPM): Limit 8000, Requested 10260, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}

# map reduce to summrize large documents 

In [30]:
from langchain.text_splitter import RecursiveCharacterTextSplitter  

In [31]:
docs

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'Attention.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszk

In [32]:
init_docs = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100).split_documents(docs)
init_docs

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'Attention.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszk

In [33]:
len(init_docs)

50

In [34]:
chunks_prompts = '''
please summarize the below speech: 
speech: {text}
summary:

'''

map_prompt_template = PromptTemplate(input_variables=["text"], template=chunks_prompts)

In [35]:
final_prompt = '''
provide the final summary of the entire speech with these important points.
add a Motivation title , start the precise summary with a introduction and provide the summary in number and points for the speech.
Speech: {text}
'''

final_prompt_template = PromptTemplate(input_variables=["text"], template=final_prompt)

In [None]:
summary_chain = load_summarize_chain(
    llm = llm, 
    chain_type="map_reduce",
    map_prompt=map_prompt_template,
    combine_prompt=final_prompt_template,
    verbose=True
)


In [None]:
outpout_summary = summary_chain.invoke(init_docs)
outpout_summary 

# refine chain

In [None]:
chain = load_summarize_chain(
    llm = llm , 
    chain_type="refine",  # refine chain
    verbose=True
    
)
output_summary = chain.invoke(init_docs)

In [None]:
outpout_summary

In [36]:
from langchain_community.document_loaders import WebBaseLoader
import bs4
loader = WebBaseLoader(web_paths=["https://en.wikipedia.org/wiki/Khalid_ibn_al-Walid"])
                  
web_load = loader.load()
docs = web_load

In [38]:
splitted_docs = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100).split_documents(docs)

In [39]:
chain = load_summarize_chain(
    llm = llm , 
    chain_type="refine",  # refine chain
    verbose=True
)
output_summary = chain.invoke(splitted_docs)



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Khalid ibn al-Walid - Wikipedia


































Jump to content







Main menu





Main menu
move to sidebar
hide



		Navigation
	


Main pageContentsCurrent eventsRandom articleAbout WikipediaContact us





		Contribute
	


HelpLearn to editCommunity portalRecent changesUpload fileSpecial pages



















Search











Search






















Appearance
















Donate

Create account

Log in








Personal tools





Donate Create account Log in





		Pages for logged out editors learn more



ContributionsTalk




























Contents
move to sidebar
hide




(Top)





1
Ancestry and early life








2
Early military career




Toggle Early military career subsection





2.1
Opposition to Muhammad








2.2
Conversion to Islam and service

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `openai/gpt-oss-20b` in organization `org_01k1web8ememaag4ax0tddjzna` service tier `on_demand` on tokens per minute (TPM): Limit 8000, Used 12275, Requested 4969. Please try again in 1m9.332s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}