In [1]:
import requests
from bs4 import BeautifulSoup

def scrape_fandom_page(url):
    response = requests.get(url)
    response.raise_for_status()
    html = response.text

    soup = BeautifulSoup(html, 'html.parser')
    title = soup.find('h1').text.strip()
    content_div = soup.find('div', {'class': 'mw-parser-output'})

    chunks = []
    current_chunk = []
    current_section_title = "Introduction"

    for tag in content_div.find_all(['h2', 'h3', 'p']):
        if tag.name in ['h2', 'h3']:
            if current_chunk:
                chunks.append({
                    "section": current_section_title,
                    "text": " ".join(current_chunk)
                })
                current_chunk = []
            current_section_title = tag.get_text(strip=True)
        elif tag.name == 'p':
            text = tag.get_text(strip=True)
            if text:
                current_chunk.append(text)

    if current_chunk:
        chunks.append({
            "section": current_section_title,
            "text": " ".join(current_chunk)
        })

    return {
        'title': title,
        'url': url,
        'chunks': chunks
    }


In [2]:
url = "https://naruto.fandom.com/wiki/Hinata_Hyūga"
result = scrape_fandom_page(url)

print("Title:", result['title'])
for chunk in result['chunks'][:3]:
    print("\n=== " + chunk['section'] + " ===")
    print(chunk['text'][:300] + "...")


Title: Hinata Hyūga

=== Introduction ===
Hinata Uzumaki(うずまきヒナタ,Uzumaki Hinata,néeHyūga(日向)) is a kunoichi ofKonohagakure. Formerly the heiress of theHyūga clan, she lost the position upon being deemed unsuited for the responsibilities of leading the clan. Nonetheless, Hinataperseveredand from her observation ofNaruto Uzumakiespecially, Hi...

=== Background[] ===
Hinata and Hanabi fight for the heiress position. Hinata is the oldest daughter ofHiashi Hyūga. When she was still a child, Hinata was almost kidnapped by the-thenHead NinjaofKumogakure, who was visiting Konoha under the guise of signing a peace treaty between their two villages. Hiashi killed him a...

=== Personality[] ===
As a child, Hinata was soft-spoken and polite, always addressing people with properhonourifics. She is kind, always thinking of others more than for herself, caring for their feelings and well-being. She doesn't like being confrontational for any reason. This led to her being meek or timid to others...


In [3]:
from transformers import pipeline

def summarize_text(text, model_name="facebook/bart-large-cnn", max_length=130, min_length=60):
    summarizer = pipeline("summarization", model=model_name)
    summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
    return summary[0]['summary_text']

sample_text = result['chunks'][0]['text']
print("\n Original:\n", sample_text[:500])

summary = summarize_text(sample_text)
print("\n Summary:\n", summary)

  from .autonotebook import tqdm as notebook_tqdm



 Original:
 Hinata Uzumaki(うずまきヒナタ,Uzumaki Hinata,néeHyūga(日向)) is a kunoichi ofKonohagakure. Formerly the heiress of theHyūga clan, she lost the position upon being deemed unsuited for the responsibilities of leading the clan. Nonetheless, Hinataperseveredand from her observation ofNaruto Uzumakiespecially, Hinata found an example to follow. Through her membership withTeam 8, she sought to become strong enough to change herself, if even a little at a time. A few years after theFourth Shinobi World War, Hin


Device set to use cuda:0



 Summary:
 Hinata Uzumaki is a kunoichi ofKonohagakure. Formerly the heiress of theHyūga clan, she lost the position upon being deemed unsuited for the responsibilities of leading the clan. Through her membership withTeam 8, she sought to become strong enough to change herself, if even a little at a time.


In [5]:
from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

individual_summaries = []
print("Summarizing individual chunks:")
for i, chunk in enumerate(result['chunks'][:5]):
    print(f"\n== {chunk['section']} ==")
    chunk_text = chunk['text']
    
    if len(chunk_text) > 1200:
        chunk_text = chunk_text[:1200]

    summary = summarizer(chunk_text, max_length=130, min_length=60, do_sample=False)[0]['summary_text']
    print(summary)
    individual_summaries.append(summary)

combined_summaries_text = " ".join(individual_summaries)

print("\n Final Summary of Combined Summaries:")
final_summary = summarizer(combined_summaries_text, max_length=150, min_length=70, do_sample=False)[0]['summary_text']
print(final_summary)


Device set to use cuda:0


Summarizing individual chunks:

== Introduction ==
Hinata Uzumaki is a kunoichi ofKonohagakure. Formerly the heiress of theHyūga clan, she lost the position upon being deemed unsuited for the responsibilities of leading the clan. Through her membership withTeam 8, she sought to become strong enough to change herself, if even a little at a time.

== Background[] ==
Hinata is the oldest daughter ofHiashi Hyūga. She was raised to be its heiress and her family expected great things from her. But even at a young age, her father came to believe that Hinata was not suited for the role. In the anime, Hinata and Hanabi were pitted against each other to determine who would one day lead the Hyūgas. Because Hinata refused to harm Hanabi, she was defeated and disinherited.

== Personality[] ==
As a child, Hinata was soft-spoken and polite, always addressing people with properhonourifics. She is kind, always thinking of others more than for herself, caring for their feelings and well-being. This led