In [None]:
import requests  # Library for making HTTP requests
from bs4 import BeautifulSoup  # Library for parsing HTML
from docx import Document  # Library for creating Word documents
from vertexai.preview.language_models import TextGenerationModel  # Language model for text summarization

In [None]:
# Initialize the text generation model
text_generation_model = TextGenerationModel.from_pretrained("text-bison@001")

In [None]:
doc = Document()
url = "https://en.wikipedia.org/wiki/Alexander_the_Great"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
content = {}
main_container = soup.find('div',class_ = 'mw-content-container')

In [None]:
# Iterate through headings and paragraphs in the Wikipedia content
for main in main_container.select('h1, h2, h3, h4, h5, h6, p'):
    if main.name == 'h1':
        # If it's an h1 heading, add it as a top-level heading in the document
        print(main.text)
        doc.add_heading(main.text.strip(), 0)
    elif main.name == 'h2':
        # If it's an h2 heading, add it as a first-level heading in the document
        print(main.text)
        doc.add_heading(main.text.strip(), level=1)
    elif main.name in ['h3', 'h4', 'h5', 'h6']:
        # If it's an h3, h4, h5, or h6 heading, add it as a lower-level heading in the document
        print(main.text)
        doc.add_heading(main.text.strip(), level=2)
    else:
        # If it's a paragraph, construct a prompt for text summarization
        prompt = "Summarize the text while retaining all critical details in a brief format. " + main.text
        
        # Generate a summary using the text generation model
        summary = text_generation_model.predict(prompt, max_output_tokens=500).text
        
        # Add the generated summary as a paragraph in the document
        doc.add_paragraph(summary.strip())
        
        # Print the generated summary for reference
        print(summary)


In [None]:
# Save the generated Word document with a filename
doc.save('Alexander_Summary.docx')