## Dependencies

In [23]:
!pip install -q langchain langchain-groq tiktoken beautifulsoup4

In [24]:
import os
import getpass
import requests
from bs4 import BeautifulSoup

## API Key

In [25]:
import os
import getpass
if "GROQ_API_KEY" not in os.environ:
    os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your GROQ API key: ")


## Extracting Raw Text from a Web Page
* `request` to get the page url
* We will use `beautifulSoup` to do this.It will convert the HTML page to raw teaxt and then we will pass the test to our LLm Model to give a Summary

In [26]:
# Function to extract the text from the webpages
def get_text_from_url(url):
    response = requests.get(url) # it will get the url
    soup = BeautifulSoup(response.text, "html.parser") # HTML Parsar

    # Remove scripts and styles
    for script in soup(["script", "style"]):
        script.decompose()

    text = soup.get_text()
    return ' '.join(text.split())

## LLM Model

In [27]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    groq_api_key= os.environ["GROQ_API_KEY"],
    model="gemma2-9b-it",  # Can be replace with our preferred model
    temperature=0.3
)


In [28]:
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.prompts import ChatPromptTemplate

# Defining the system prompt (how the AI should act)
system_prompt = SystemMessagePromptTemplate.from_template(
    "You are a professional AI summarizer with expertise in extracting key insights covery every detail."
)

user_prompt = HumanMessagePromptTemplate.from_template(
    """Your task is to do the following for the extracted text from a webpage: {Text}:

    1. Title Extraction: Identify and return the title of the webpage as a standalone heading.

    2. Deep Insight Summary: Summarize the page content in a concise, yet insightful paragraph. Focus on the main purpose, unique points, and any calls to action or conclusions.

    3. Structured Bullet Points: Provide 3–5 bullet points that highlight the most important takeaways, written in simple language.

Ensure the summary is accurate, captures the tone of the original content, and is useful for someone who hasn’t seen the page.""",
    input_variables=["Text"]
)



prompt = ChatPromptTemplate.from_messages([system_prompt, user_prompt])

## Chain

In [29]:
from langchain_core.runnables import RunnableLambda, RunnableMap

chain = (
    RunnableMap({"Text": lambda x: x["Text"]})
    | RunnableLambda(lambda x: prompt.invoke(x))   # <-- this line fixes it
    | llm
    | RunnableLambda(lambda x: {"Web_Page Summary": x.content})
)


#3 Getting url
url-> raw_text -> chunking large raw text -> summarize

In [30]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Getting text from the url
url = "https://en.wikipedia.org/wiki/Narendra_Modi"
text = get_text_from_url(url)

# Sending text to splitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=3000,
    chunk_overlap=200
)


chunks = splitter.split_text(text)

summaries = [chain.invoke({"Text": chunk})["Web_Page Summary"] for chunk in chunks]


# Combine all partial summaries
final_summary = "\n\n".join(summaries)
print(final_summary)


## Narendra Modi - Wikipedia

### Deep Insight Summary

This Wikipedia article provides a comprehensive overview of the life, career, and political impact of Narendra Modi, the current Prime Minister of India.  It details his early life, political ascent from Chief Minister of Gujarat to national leadership, and his policies and initiatives as Prime Minister. The article also explores public perception of Modi, both positive and negative, highlighting his strong leadership image and controversial aspects like the 2002 Gujarat riots.  It concludes by outlining his electoral successes and lasting influence on Indian politics.

### Structured Bullet Points

* **Narendra Modi is the current Prime Minister of India**, having served since 2014.
* **He previously served as Chief Minister of Gujarat** from 2001 to 2014, overseeing significant economic development but also facing criticism over the 2002 Gujarat riots.
* **Modi's political career is marked by strong leadership and a focus on Hin