In [16]:
import requests
from bs4 import BeautifulSoup
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
import nltk

nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\prave\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [19]:
def fetch_webpage_content(url):
    response = requests.get(url)
    response.raise_for_status()
    return response.text

def parse_html_content(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    text_elements = soup.find_all(['p', 'h1', 'h2', 'h3', 'li'])
    text = ' '.join(element.get_text() for element in text_elements)
    return text

def summarize_text(text, sentences_count=5):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, sentences_count)
    return ' '.join(str(sentence) for sentence in summary)

def summarize_webpage(url, sentences_count=5):
    html_content = fetch_webpage_content(url)
    text_content = parse_html_content(html_content)
    summary = summarize_text(text_content, sentences_count)
    return summary

# Example usage


In [21]:
url = "https://medium.com/the-research-nest/explained-transformers-for-everyone-af01cbe600c5"
summary = summarize_webpage(url)
summary

'Once that’s done, you have your “AI model” to generate new responses based on everything learned from the training data. In technical terms, attention mechanisms calculate weights, determining how much focus is put on each part of the input data. This process helps the model build a deep understanding of how words are related to each other and what actually makes sense. If you want to learn more about “tokens” and “embeddings (the special numbers as I refer them),” check out my other article below. I intentionally did not touch upon the exact mathematical concepts or equations used in each layer to avoid unnecessary complications.'

In [None]:
from transformers import pipeline

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("summarization", model="facebook/bart-large-cnn")

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")

In [None]:
url = "https://medium.com/the-research-nest/explained-transformers-for-everyone-af01cbe600c5"
summary = summarize_webpage(url)
summary