In [None]:
import requests

URL = "https://wanderinginn.com/2017/03/03/rw1-00/"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate"
}

response = requests.get(URL, headers=HEADERS)
print(response.status_code)

In [None]:
from pathlib import Path

# Determine a filename for the chapter based on the URL
fname = Path.cwd() / "raw" / (URL.strip("/").split("/")[-1] + ".html")

# Save the raw html page just in case
with open(fname, "w") as file:  
    print(f"Save raw webpage as: \"{fname}\"")
    file.write(response.text)

In [None]:
from bs4 import BeautifulSoup
import sys

# Set up BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Identify the container for the chapter title
title_tag = soup.find(class_="elementor-element elementor-element-3d7596e elementor-widget elementor-widget-heading")
if title_tag is None:
    print("Error: Could not find title container!")
    sys.exit()

# Get the tag with the chapter title
title_tag = title_tag.find("h2")
if title_tag is None:
    print("Error: Could not find title!")
    sys.exit()

# Print the chapter title
title = title_tag.contents[0]
print(f"Title: {title}")

# Identify the containter for the chapter date
date_tag = soup.find(class_="elementor-element elementor-element-8aba006 elementor-widget elementor-widget-text-editor")
if title_tag is None:
    print("Error: Could not find date container!")
    sys.exit()

# Get the tag with the chapter date
date_tag = date_tag.find(class_="elementor-widget-container")
if date_tag is None:
    print("Error: Could not find date tag!")
    sys.exit()

# Print the chapter date
date = date_tag.contents[0].strip()
print(f"Date: {date}")

# Get the main chapter content
chapter = soup.find(class_="twi-article")
if chapter is None:
    print("Error: Could not chapter content!")
    sys.exit()

# Strip the bottom links from the chapter content
for i in range(2):
    print(f"Removed extraneous tag {i+1}: {repr(chapter.contents.pop(-1))}")

# Print the word count of the chapter content
print(f"Content: {len(chapter.text.split())} words")

# Find the tag with the link to the next chapter
next = soup.find(rel="next")
if next is None:
    print("Error: No link to next chapter found!")
else:
    print(f"Link to next chapter: {next.get("href")}")