In [5]:
import requests
import json
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup

# URL of the search page
url = "https://www.goodreads.com/search"

# Parameters for the search query
params = {
    "q": "Data Scientist",
    "search_type": "books"
}

# Send a GET request to the search page
response = requests.get(url, params=params)

# Create a BeautifulSoup object to parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")

# Find the book elements on the page
book_elements = soup.find_all("tr", itemtype="http://schema.org/Book")

# Initialize data list
books_data = []

# Iterate over each book element and extract the title
for book_element in book_elements:
    # Extract book title
    title_element = book_element.find("a", class_="bookTitle")
    title = title_element.text.strip()

    # Create a dictionary with book data
    book_data = {
        "title": title
    }

    # Append book data to the list
    books_data.append(book_data)

# Save the data to a JSON file
json_file = "books_data.json"

with open(json_file, "w") as f:
    json.dump(books_data, f, indent=4)

# Save the data to an XML file
xml_file = "books_data.xml"

# Create a root element for the XML
root = ET.Element("books")

# Create sub-elements for each book
for book in books_data:
    book_element = ET.SubElement(root, "book")
    title_element = ET.SubElement(book_element, "title")
    title_element.text = book["title"]

# Create an ElementTree from the root element
tree = ET.ElementTree(root)

# Write the ElementTree to the XML file
tree.write(xml_file, encoding="utf-8", xml_declaration=True)

# Print success message
print("Data scraped and saved successfully!")


Data scraped and saved successfully!


In [8]:
print(books_data)

[{'title': 'Practical Statistics for Data Scientists: 50 Essential Concepts'}, {'title': 'Introduction to Machine Learning with Python: A Guide for Data Scientists'}, {'title': 'Data Analysis with Open Source Tools: A Hands-On Guide for Programmers and Data Scientists'}, {'title': 'Practical Statistics for Data Scientists: 50+ Essential Concepts Using R and Python'}, {'title': 'Introduction to Data Visualization & Storytelling: A Guide For The Data Scientist'}, {'title': 'Think Like a Data Scientist: Tackle the data science process step-by-step'}, {'title': 'Data Scientists at Work'}, {'title': 'Developing Analytic Talent: Becoming a Data Scientist'}, {'title': 'The Data Science Handbook: Advice and Insights from 25 Amazing Data Scientists'}, {'title': 'Analyzing the Analyzers: An Introspective Survey of Data Scientists and Their Work'}, {'title': 'Truth or Truthiness: Distinguishing Fact from Fiction by Learning to Think Like a Data Scientist'}, {'title': 'Data Science from Scratch: T