In [995]:
!pip install bs4



In [996]:
from bs4 import BeautifulSoup

# Sample HTML document
html_doc = """
<html>
<head><title>World's Population</title></head>
<body>
    <h1>Main Heading</h1>
    <p class="xyz">This is a paragraph.</p>
    <p class="content">Another paragraph with <a href="https://example.com">a link</a>.</p>

    <div id="container">
        <ul>
            <li class="item">Item 1</li>
            <li class="item">Item 2</li>
            <li class="item">Item 3</li>
        </ul>
    </div>

    <table>
        <tr><th>Name</th><th>Age</th></tr>
        <tr><td>Alice</td><td>25</td></tr>
        <tr><td>Bob</td><td>30</td></tr>
    </table>
</body>
</html>
"""

In [997]:
soup = BeautifulSoup(html_doc, "html.parser")
print(soup)


<html>
<head><title>World's Population</title></head>
<body>
<h1>Main Heading</h1>
<p class="xyz">This is a paragraph.</p>
<p class="content">Another paragraph with <a href="https://example.com">a link</a>.</p>
<div id="container">
<ul>
<li class="item">Item 1</li>
<li class="item">Item 2</li>
<li class="item">Item 3</li>
</ul>
</div>
<table>
<tr><th>Name</th><th>Age</th></tr>
<tr><td>Alice</td><td>25</td></tr>
<tr><td>Bob</td><td>30</td></tr>
</table>
</body>
</html>



In [998]:
print("Page Title:", soup.body.text)

Page Title: 
Main Heading
This is a paragraph.
Another paragraph with a link.


Item 1
Item 2
Item 3



NameAge
Alice25
Bob30




In [999]:
x = BeautifulSoup("<html><body><span>This is first</span><p>This is second</p></body></html>","html.parser")
print("\nPrettified HTML:")
print(x.prettify())


Prettified HTML:
<html>
 <body>
  <span>
   This is first
  </span>
  <p>
   This is second
  </p>
 </body>
</html>



In [1000]:
print("\nFind first paragraph:",( soup.find("p").text) ) # First <p> tag
print("Find all paragraphs:", soup.find_all("p"))  # All <p> tags
x = []
for i in soup.find_all("p"):
  x.append(i.text)
print(x)


Find first paragraph: This is a paragraph.
Find all paragraphs: [<p class="xyz">This is a paragraph.</p>, <p class="content">Another paragraph with <a href="https://example.com">a link</a>.</p>]
['This is a paragraph.', 'Another paragraph with a link.']


In [1001]:
print("\nUsing CSS Selectors:")
print(soup.select("p")[0].text)  # Select by tag name
print(soup.select(".xyz")[0].text)  # Select by class
print(soup.select("#container ul li")[1].text)


Using CSS Selectors:
This is a paragraph.
This is a paragraph.
Item 2


In [1002]:
print("\nNavigating the DOM:")
print(soup.body.h1.text)  # First <h1> tag inside <body>
print(soup.body.div.ul.li.text)


Navigating the DOM:
Main Heading
Item 1


In [1003]:
link = soup.find("a")
print(link.text)
print("\nExtracting Attributes:")
print("Link URL:", link["href"])

a link

Extracting Attributes:
Link URL: https://example.com


In [1004]:
print("\nModifying HTML:")
soup.find("h1").string = "Updated Heading"
print(soup.h1.text)


Modifying HTML:
Updated Heading


In [1005]:
print("\nExtracting All Text from Page:")
print(soup.get_text(separator='|'))


Extracting All Text from Page:

|
|World's Population|
|
|Updated Heading|
|This is a paragraph.|
|Another paragraph with |a link|.|
|
|
|Item 1|
|Item 2|
|Item 3|
|
|
|
|Name|Age|
|Alice|25|
|Bob|30|
|
|
|



In [1006]:
soup.find("h1").decompose()  # Remove <h1> tag
print("\nAfter Removing h1:", soup)


After Removing h1: 
<html>
<head><title>World's Population</title></head>
<body>

<p class="xyz">This is a paragraph.</p>
<p class="content">Another paragraph with <a href="https://example.com">a link</a>.</p>
<div id="container">
<ul>
<li class="item">Item 1</li>
<li class="item">Item 2</li>
<li class="item">Item 3</li>
</ul>
</div>
<table>
<tr><th>Name</th><th>Age</th></tr>
<tr><td>Alice</td><td>25</td></tr>
<tr><td>Bob</td><td>30</td></tr>
</table>
</body>
</html>



In [1007]:
table1 = soup.find("table")
rows = table1.find_all("tr")
print("\nTable Data:")
for row in rows:
   cols = row.find_all("td")
   print(row.text.split())
   if cols:
        print(f"Name: {cols[0].text}, Age: {cols[1].text}")


Table Data:
['NameAge']
['Alice25']
Name: Alice, Age: 25
['Bob30']
Name: Bob, Age: 30


In [1008]:
x='<html><th>Location</th><th>Population</th></html>'

In [1009]:
x

'<html><th>Location</th><th>Population</th></html>'

In [1010]:
print(type(x))

<class 'str'>


In [1011]:
from bs4 import BeautifulSoup

In [1012]:
soup=BeautifulSoup(x)
print(soup)
print(type(soup))

<html><body><th>Location</th><th>Population</th></body></html>
<class 'bs4.BeautifulSoup'>


In [1013]:
firstfind = soup.find('th')
print(firstfind)
print(type(firstfind))
print(firstfind.text)
print(type(firstfind.text))

<th>Location</th>
<class 'bs4.element.Tag'>
Location
<class 'str'>


In [1014]:
allfind=soup.find_all('th')
print(allfind)
print(type(allfind))

[<th>Location</th>, <th>Population</th>]
<class 'bs4.element.ResultSet'>


In [1015]:
for i in allfind:
  print(i)

<th>Location</th>
<th>Population</th>


In [1016]:
for i in allfind:
  print(i.text)

Location
Population


In [1017]:
headers=[]
for i in allfind:
  headers.append(i.text)

headers

['Location', 'Population']

In [1018]:
headers = [i.text for i in allfind]
headers

['Location', 'Population']

In [1019]:
### Question 1

from bs4 import BeautifulSoup

html_doc = "<html><body><h1>Hello, World!</h1></body></html>"
### Question 1

from bs4 import BeautifulSoup

html_doc = "<html><body><h1>Hello, World!</h1></body></html>"
soup = BeautifulSoup(html_doc, "html.parser")
print(soup.h1.text)
# Options:
# A')' <h1>Hello, World!</h1>
# B')' Hello, World!
### Question 1

from bs4 import BeautifulSoup

html_doc = "<html><body><h1>Hello, World!</h1></body></html>"
soup = BeautifulSoup(html_doc, "html.parser")
print(soup.h1.text)
# Options:
# A')' <h1>Hello, World!</h1>
# B')' Hello, World!
# C')' None
# D')' Error: AttributeError
#C')' None
#D')' Error: AttributeError

Hello, World!
Hello, World!


In [1020]:
print("Correct Answer: B) Hello, World!")

Correct Answer: B) Hello, World!


In [1021]:
### Question 2
from bs4 import BeautifulSoup

html_doc = "<html><body><p class='info'>Python</p><p>Beautiful Soup</p></body></html>"
soup = BeautifulSoup(html_doc, "html.parser")
print(soup.find('p').text)
#**Options:**
#A) Python
#B) Beautiful Soup
#C) <p class='info'>Python</p>
#D) None

Python


In [1022]:
# Correct Answer
print("Correct Answer: A) Python")

Correct Answer: A) Python


In [1023]:
### Question 3

from bs4 import BeautifulSoup

html_doc = """
<html>
  <body>
    <p class='content'>Paragraph 1</p>
    <p>Paragraph 2</p>
    <p class='content'>Paragraph 3</p>
  </body>
</html>
"""
soup = BeautifulSoup(html_doc, "html.parser")
result = soup.find_all('p', class_='content')
for item in result:
    print(item.text)
#**Options:**
#A) Paragraph 1
#B) Paragraph 1 and Paragraph 3
#C) Paragraph 2
#D) None

Paragraph 1
Paragraph 3


In [1024]:
print("Correct Answer: B) Paragraph 1 and Paragraph 3")

Correct Answer: B) Paragraph 1 and Paragraph 3


In [1025]:
### Question 4


from bs4 import BeautifulSoup

html_doc = "<html><a href='https://example.com'>Visit Example</a></html>"
soup = BeautifulSoup(html_doc, "html.parser")
print(soup.a['href'])
#**Options:**
#A) https://example.com
#B) Visit Example
#C) <a href='https://example.com'>Visit Example</a>
#D) None

https://example.com


In [1026]:
print("Correct Answer: A) https://example.com")

Correct Answer: A) https://example.com


In [1027]:
### Question 5


from bs4 import BeautifulSoup

html_doc = """
<html>
  <body>
    <div>
      <h1>Title</h1>
      <p>Description</p>
    </div>
  </body>
</html>
"""
soup = BeautifulSoup(html_doc, "html.parser")
print(soup.div.h1.text)
#**Options:**
#A) Title
#B) Description
#C) <h1>Title</h1>
#D) None


Title


In [1028]:
print("Correct Answer: A) Title")

Correct Answer: A) Title


In [1029]:
### Question 6

from bs4 import BeautifulSoup

html_doc = """
<html>
  <body>
    <ul>
      <li>Item 1</li>
      <li>Item 2</li>
      <li>Item 3</li>
    </ul>
  </body>
</html>
"""
soup = BeautifulSoup(html_doc, "html.parser")
items = soup.find_all('li')
print(len(items))
#**Options:**
#A) 1
#B) 2
#C) 3
#D) 0




3


In [1030]:
print("Correct Answer: C) 3")

Correct Answer: C) 3


In [1031]:
### Question 7


from bs4 import BeautifulSoup

html_doc = """
<html>
  <body>
    <div id="main">
      <p class="text">First</p>
      <p class="text">Second</p>
    </div>
  </body>
</html>
"""
soup = BeautifulSoup(html_doc, "html.parser")
result = soup.select('#main .text')
for item in result:
    print(item.text)
#**Options:**
#A) First
#B) First and Second
#C) Second
#D) None

First
Second


In [1032]:
print("Correct Answer: B) First and Second")

Correct Answer: B) First and Second


In [1033]:
### Question 8

from bs4 import BeautifulSoup

html_doc = "<html><body><h1>Hello</h1><p>World</p></body></html>"
soup = BeautifulSoup(html_doc, "html.parser")
print(soup.get_text())
#**Options:**
#A) Hello World
#B) Hello\nWorld
#C) <h1>Hello</h1><p>World</p>
#D) None

HelloWorld


In [1034]:
print("Correct Answer: B) Hello\nWorld")

Correct Answer: B) Hello
World


In [1035]:
### Question 9


from bs4 import BeautifulSoup

html_doc = """
<html>
  <body>
    <img src="image1.png" alt="First Image">
    <img src="image2.png" alt="Second Image">
  </body>
</html>
"""
soup = BeautifulSoup(html_doc, "html.parser")
image = soup.find('img', alt='Second Image')
print(image['src'])
#**Options:**
#A) image1.png
#B) image2.png
#C) <img src='image2.png' alt='Second Image'>
#D) None

image2.png


In [1036]:
print("Correct Answer: B) image2.png")

Correct Answer: B) image2.png


In [1037]:
### Question 10

from bs4 import BeautifulSoup

html_doc = """
<html>
  <body>
    <p class="info detail">Content 1</p>
    <p class="info">Content 2</p>
  </body>
</html>
"""
soup = BeautifulSoup(html_doc, "html.parser")
result = soup.find_all('p', class_='info')
for item in result:
    print(item.text)
#**Options:**
#A) Content 1
#B) Content 2
#C) Content 1 and Content 2
#D) None


Content 1
Content 2


In [1038]:
print("Correct Answer: C) Content 1 and Content 2")

Correct Answer: C) Content 1 and Content 2


In [1039]:
from bs4 import BeautifulSoup
import requests

# URL of the webpage you want to scrape (using a real blog URL for example)
url = 'https://realpython.com/'

# Send an HTTP request to the webpage
response = requests.get(url)

# Parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')

# Extract all the blog post titles (example: assuming titles are in <h2> tags with class 'card-title')
titles = soup.find_all('h2', class_='card-title')

# Print the extracted titles
print("Blog Post Titles:")
for i, title in enumerate(titles, 1):
    print(f"{i}. {title.text.strip()}")

Blog Post Titles:
1. How to Split a String in Python
2. NumPy Techniques and Practical Examples
3. Python for Loops: The Pythonic Way
4. Build a Dice-Rolling Application With Python
5. Top Python Game Engines
6. Develop Data Visualization Interfaces in Python With Dash
7. Build a Quiz Application With Python
8. Build a Tic-Tac-Toe Game With Python and Tkinter
9. Python & APIs: A Winning Combo for Reading Public Data
10. A Guide to Modern Python String Formatting Tools
11. Providing Multiple Constructors in Your Python Classes
12. Natural Language Processing With spaCy in Python
13. Split Your Dataset With scikit-learn's train_test_split()
14. Creating a Scalable Flask Web Application From Scratch
15. How to Split a Python List or Iterable Into Chunks
16. Lists vs Tuples in Python
17. Python Folium: Create Web Maps From Your Data
18. Python's "in" and "not in" Operators: Check for Membership
19. Python's zipfile: Manipulate Your ZIP Files Efficiently
