# Web Scraping and API Interaction Cheat Sheet


In [1]:
## Import Libraries
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO

In [4]:
# Accessing element attribute
# Access the value of a specific attribute of an HTML element.
# Syntax:
# attribute = element[attribute]
# Example:
html = '<a href="https://example.com">Example</a>'
soup = BeautifulSoup(html, 'html.parser')
link_element = soup.find('a')
href = link_element['href']
print(f'href: {href}')

href: https://example.com


In [3]:
# BeautifulSoup()
# Parse the HTML content of a web page using BeautifulSoup.
# Syntax:
# soup = BeautifulSoup(html, 'html.parser')
# Example:
html = requests.get('https://example.com').text
soup = BeautifulSoup(html, 'html.parser')

In [9]:
# find()
# Find the first HTML element that matches the specified tag and attributes.
# Syntax:
# element = soup.find(tag, attrs)
# Example:
first_link = soup.find('a', {'class': 'link'})
print(f'First link: {first_link}')

First link: None


In [1]:
# Web Scraping and API Interaction Cheat Sheet

## Import Libraries
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO

# Accessing element attribute
# Access the value of a specific attribute of an HTML element.
# Syntax:
# attribute = element[attribute]
# Example:
html = '<a href="https://example.com">Example</a>'
soup = BeautifulSoup(html, 'html.parser')
link_element = soup.find('a')
href = link_element['href']
print(f'href: {href}')

# BeautifulSoup()
# Parse the HTML content of a web page using BeautifulSoup.
# Syntax:
# soup = BeautifulSoup(html, 'html.parser')
# Example:
html = requests.get('https://example.com').text
soup = BeautifulSoup(html, 'html.parser')

# delete()
# Send a DELETE request to remove data or a resource from the server.
# Syntax:
# response = requests.delete(url)
# Example:
# response = requests.delete('https://api.example.com/delete')

# find()
# Find the first HTML element that matches the specified tag and attributes.
# Syntax:
# element = soup.find(tag, attrs)
# Example:
first_link = soup.find('a', {'class': 'link'})
print(f'First link: {first_link}')

# find_all()
# Find all HTML elements that match the specified tag and attributes.
# Syntax:
# elements = soup.find_all(tag, attrs)
# Example:
all_links = soup.find_all('a', {'class': 'link'})
print(f'All links: {all_links}')

# findChildren()
# Find all child elements of an HTML element.
# Syntax:
# children = element.findChildren()
# Example:
parent_div = soup.find('div', {'id': 'parent'})
child_elements = parent_div.findChildren()
print(f'Child elements: {child_elements}')

# get()
# Perform a GET request to retrieve data from a specified URL.
# Syntax:
# response = requests.get(url)
# Example:
url = 'https://api.example.com/data'
response = requests.get(url)
print(f'Response: {response.text}')

# Headers
# Include custom headers in the request.
# Syntax:
# headers = {'HeaderName': 'Value'}
# Example:
base_url = 'https://api.example.com/data'
headers = {'Authorization': 'Bearer YOUR_TOKEN'}
response = requests.get(base_url, headers=headers)
print(f'Response with headers: {response.text}')

# json()
# Parse JSON data from the response.
# Syntax:
# data = response.json()
# Example:
response = requests.get('https://api.example.com/data')
data = response.json()
print(f'JSON data: {data}')

# next_sibling()
# Find the next sibling element in the DOM.
# Syntax:
# sibling = element.find_next_sibling()
# Example:
current_element = soup.find('p')
next_sibling = current_element.find_next_sibling()
print(f'Next sibling: {next_sibling}')

# parent
# Access the parent element in the Document Object Model (DOM).
# Syntax:
# parent = element.parent
# Example:
paragraph = soup.find('p')
parent_div = paragraph.parent
print(f'Parent element: {parent_div}')

# post()
# Send a POST request to a specified URL with data.
# Syntax:
# response = requests.post(url, data)
# Example:
url = 'https://api.example.com/submit'
data = {'key': 'value'}
response = requests.post(url, data=data)
print(f'POST response: {response.text}')

# put()
# Send a PUT request to update data on the server.
# Syntax:
# response = requests.put(url, data)
# Example:
url = 'https://api.example.com/update'
data = {'key': 'value'}
response = requests.put(url, data=data)
print(f'PUT response: {response.text}')

# Query parameters
# Pass query parameters in the URL to filter or customize the request.
# Syntax:
# params = {'param_name': 'value'}
# Example:
base_url = 'https://api.example.com/data'
params = {'page': 1, 'per_page': 10}
response = requests.get(base_url, params=params)
print(f'Response with query parameters: {response.text}')

# select()
# Select HTML elements from the parsed HTML using a CSS selector.
# Syntax:
# element = soup.select(selector)
# Example:
titles = soup.select('h1')
print(f'Titles: {titles}')

# status_code
# Check the HTTP status code of the response.
# Syntax:
# response.status_code
# Example:
url = 'https://api.example.com/data'
response = requests.get(url)
status_code = response.status_code
print(f'Status code: {status_code}')

# tags for find() and find_all()
# Specify any valid HTML tag as the tag parameter to search for elements of that type.
# Example:
tags = ['a', 'p', 'h1', 'table', 'tr', 'td', 'th', 'img', 'form', 'button']

# text
# Retrieve the text content of an HTML element.
# Syntax:
# text = element.text
# Example:
title_element = soup.find('h1')
title_text = title_element.text
print(f'Title text: {title_text}')

# Opening an image using PIL
# URL of the image
image_url = "https://hips.hearstapps.com/hmg-prod.s3.amazonaws.com/images/dog-puppy-on-garden-royalty-free-image-1586966191.jpg"
# Download the image
image_response = requests.get(image_url)
img_data = image_response.content
# Open the image with Pillow
image = Image.open(BytesIO(img_data))
# Display the image
image.show()


href: https://example.com
First link: None
All links: []


AttributeError: 'NoneType' object has no attribute 'findChildren'