In [2]:
### Task 1: Working with Requests
## 1 - Install Python Requests Library.
import requests
url = 'https://jsonplaceholder.typicode.com/posts/1'

In [3]:
## 2 - Use a GET request.
# Send a GET request to the URL
response = requests.get(url)

In [4]:
### 3 - Use an if-else statement to check if the request was successful.
# Check if request was successful (status code 200)
if response.status_code == 200:
    print("successful: ", response.text)
else:
    print("Error")

successful:  {
  "userId": 1,
  "id": 1,
  "title": "sunt aut facere repellat provident occaecati excepturi optio reprehenderit",
  "body": "quia et suscipit\nsuscipit recusandae consequuntur expedita et cum\nreprehenderit molestiae ut ut quas totam\nnostrum rerum est autem sunt rem eveniet architecto"
}


In [5]:
### Task 2: Working with Pandas read_html

### 1 - Install Python Pandas Library.
import pandas as pd
# URL of a webpage with food-related data
food_url = 'https://en.wikipedia.org/wiki/List_of_cuisines'

In [6]:
### 2 - Read the HTML tables into a dataframe.
# Read HTML tables from the webpage
food_tables = pd.read_html(food_url)

In [7]:
## 3 - Display the number of tables found in the URL.
print("Number of tables found:", len(food_tables))

Number of tables found: 24


In [8]:
## 4 - Display the first table in the dataframe.
print("First table:")
print(food_tables[0])

First table:
                                                   0
0                                Part of a series on
1                                              Meals
2                                              Meals
3  Suhur Breakfast Second breakfast Elevenses Bru...
4                             Components and courses
5  Full-course dinner Tasting menu Amuse-bouche H...
6                                   Related concepts
7  À la carte Banquet Buffet Cuisine list Drink E...
8                                                vte


In [11]:
#### Task 3: Working with BeautifulSoup and html5lib parser

## 1 - Install Python BeautifulSoup and Requests Library.
import requests
from bs4 import BeautifulSoup

# URL to scrap data from
url = 'https://en.wikipedia.org/wiki/Beautiful_Soup_(HTML_parser)'

In [12]:
## 3.2 - Send a GET request to the URL
request = requests.get(url)

In [13]:
### 3.3 - Scrap content using Beautiful and html5lib parser.
soup = BeautifulSoup(request.content, 'html5lib')

In [14]:
#### 3.4 - Find the first paragraph.
# Find and extract the first paragraph
first_paragraph = soup.find('p')

In [15]:
### 3.5 - Display the first paragraph text.
# Print the text of the first paragraph
print("First paragraph of the article:")
print(first_paragraph.text.strip())
# strip() is used to remove leading/trailing white spaces

First paragraph of the article:
Beautiful Soup is a Python package for parsing HTML and XML documents, including those with malformed markup. It creates a parse tree for documents that can be used to extract data from HTML,[3] which is useful for web scraping.[2][4]


In [16]:
#### Task 4: Working with BeautifulSoup and lxml (xml) parser

### 1 -  Install Python BeautifulSoup and Requests Library
import requests
from bs4 import BeautifulSoup

# Use this xml code
drinks_xml = '''
<?xml version="1.0" encoding="UTF-8"?>
<drinks>
    <drink>
        <name>Coffee</name>
        <price>2.50</price>
        <ingredients>Coffee beans, water</ingredients>
    </drink>
    <drink>
        <name>Tea</name>
        <price>1.80</price>
        <ingredients>Tea leaves, water</ingredients>
    </drink>
    <drink>
        <name>Orange Juice</name>
        <price>3.00</price>
        <ingredients>Orange juice concentrate, water</ingredients>
    </drink>
</drinks>
'''


In [17]:
### 4.2 - Scrap content using BeautifulSoup and lxml (xml) parser.
soup = BeautifulSoup(drinks_xml, 'lxml')

In [18]:
### 4.3 - Find all drinks information.
drinks = soup.find_all('drink')
print(drinks)

[<drink>
<name>Coffee</name>
<price>2.50</price>
<ingredients>Coffee beans, water</ingredients>
</drink>, <drink>
<name>Tea</name>
<price>1.80</price>
<ingredients>Tea leaves, water</ingredients>
</drink>, <drink>
<name>Orange Juice</name>
<price>3.00</price>
<ingredients>Orange juice concentrate, water</ingredients>
</drink>]


In [19]:
##### 4.4 Iterate each drink item into Python objects.
for drink in drinks:
    name = drink.find('name').text
    price = float(drink.find('price').text)
    ingredients = drink.find('ingredients').text

In [20]:
#### 4.5 - Display all drink items for each drink.
    # Print all drinks
for drink in drinks:
    name = drink.find('name').text
    price = float(drink.find('price').text)
    ingredients = drink.find('ingredients').text
    print(f"Name: {name}")
    print(f"Price: ${price:.2f}")  # 2f converts to 2 float points
    print(f"Ingredients: {ingredients}")
    print()   # Inputs a next line after each drink

Name: Coffee
Price: $2.50
Ingredients: Coffee beans, water

Name: Tea
Price: $1.80
Ingredients: Tea leaves, water

Name: Orange Juice
Price: $3.00
Ingredients: Orange juice concentrate, water



In [21]:
#### Task 5: Working with BeautifulSoup and html.parser

#### 1 -   Install Python BeautifulSoup and Requests Library.
import requests

from bs4 import BeautifulSoup

# HTML content of the story
story_html = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>The Magic Garden</title>
</head>
<body>
    <h1 class="title">The Magic Garden</h1>

    <p>Once upon a time, in a faraway land, there existed a magical garden where</p>

    <div class="characters">
        <p class="character" id="princess">Princess Lily</p>
        <p class="character" id="wizard">Wizard Merlin</p>
        <p class="character" id="unicorn">Unicorn Sparkle</p>
    </div>

    <p>These characters roamed freely among the enchanted flowers and shimmering ponds.</p>

    <div class="story-arc">
        <h2>Chapter 1: The Discovery</h2>
        <p>One day, Princess Lily stumbled upon a hidden path that led to a secret grove...</p>
    </div>

    <div class="story-arc">
        <h2>Chapter 2: The Quest</h2>
        <p>With the help of Wizard Merlin and Unicorn Sparkle, Princess Lily embarked on a quest...</p>
    </div>

    <div class="story-arc">
        <h2>Chapter 3: The Triumph</h2>
        <p>After overcoming many challenges, they finally discovered the source of the garden's magic...</p>
    </div>
</body>
</html>
"""



In [22]:
### 5.2 - Scrap content using BeautifulSoup and html.parser.
soup = BeautifulSoup(story_html, 'html.parser')

In [23]:
### 5.3 - Display the head tag.
head_tag = soup.head
print("Head tag:")
print(head_tag)

Head tag:
<head>
<meta charset="utf-8"/>
<title>The Magic Garden</title>
</head>


In [24]:
### 5.4 - Display the title tag.
print(soup.title)

<title>The Magic Garden</title>


In [25]:
### 5.5 - Display the text in the title tag.
print(soup.title.string)

The Magic Garden


In [26]:
### 5.6 - Display the p class in the body tag.
print(soup.body.p)

<p>Once upon a time, in a faraway land, there existed a magical garden where</p>


In [27]:
### 5.7 -  Display the div class in the body tag.
print(soup.body.div)

<div class="characters">
<p class="character" id="princess">Princess Lily</p>
<p class="character" id="wizard">Wizard Merlin</p>
<p class="character" id="unicorn">Unicorn Sparkle</p>
</div>


In [28]:
### 5.8 - Display h1 tag
print(soup.h1)

<h1 class="title">The Magic Garden</h1>


In [29]:
##### 5.9 - Filter based on a string ‘title’ and print the text of the string.
title_tag = soup.find('title')
print("Title of the story:", title_tag.text)

Title of the story: The Magic Garden


In [30]:
#### 5.10 - Filter based on a regular expression ‘character’ and print each result.
# Filter attribute based on a regular expression
characters = soup.find(class_=lambda x: x and 'character' in x)
print("\nCharacters in the story:")  #\n inserts a new line
for character in characters:
    print(character.text)


Characters in the story:


Princess Lily


Wizard Merlin


Unicorn Sparkle




In [31]:
#### 5.11 - Filter based on a list [“story-arc”] and print each result.
# Filter by attribute based on a list
story_arcs = soup.find_all(class_=["story-arc"])
print("\nStory Arcs:")
for arc in story_arcs:
    print(arc.h2.text)


Story Arcs:
Chapter 1: The Discovery
Chapter 2: The Quest
Chapter 3: The Triumph


In [32]:
#### 5.12 - Filter based on a function and print the result.
# Filter by attribute based on a function
def is_title_tag(tag):
    return tag.name == "h1" and "title" in tag.get('class', [])

title_tag = soup.find(is_title_tag)
print("\nTitle of the story using a function:", title_tag.text)


Title of the story using a function: The Magic Garden


In [35]:
#### 5.13 - Filter based on the value True and print each result.
# Filter by attribute with value True
div_with_id = soup.find(id=True).text
print("\nFirst div with an 'id' attribute:", div_with_id)


First div with an 'id' attribute: Princess Lily
