In [8]:
import requests
from bs4 import BeautifulSoup
import re

def scrape_chapter(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extracting the chapter title
    title_tag = soup.find('h1')
    if title_tag:
        title = title_tag.text.strip()
    else:
        title = "No Title Found"

    # Extracting the main content of the chapter
    content_div = soup.find('div', class_='entry-content')  # Use the correct class or ID for the main content
    if content_div:
        paragraphs = content_div.find_all('p')
        chapter_text = '\n'.join(p.text.strip() for p in paragraphs)
    else:
        chapter_text = "No Content Found"

    # Return the extracted title and chapter content
    return title, chapter_text

def sanitize_file_name(file_name):
    # Replace invalid characters with an underscore or remove them
    file_name = re.sub(r'[\\/*?:"<>|]', '_', file_name)
    file_name = re.sub(r'[’]', '', file_name)  # Remove the apostrophe character
    return file_name

# File to save all chapters
output_file_name = "Sorcerers_Handbook_All_Chapters.txt"

# Loop through all chapter URLs
base_url = 'https://hoxionia.com/sorcerers-handbook-chapter-'
total_chapters = 352

with open(output_file_name, 'w', encoding='utf-8') as output_file:
    for i in range(1, total_chapters + 1):
        chapter_url = f"{base_url}{i}/"
        title, chapter_text = scrape_chapter(chapter_url)
        
        # Write the title and content to the file
        output_file.write(f"Title: {title}\n\n")
        output_file.write(chapter_text)
        output_file.write("\n\n" + "="*50 + "\n\n")  # Add some space between chapters

        print(f"Chapter {i} scraped and added to the file.")

print(f"All {total_chapters} chapters have been scraped and saved to {output_file_name}.")


Chapter 1 scraped and added to the file.
Chapter 2 scraped and added to the file.
Chapter 3 scraped and added to the file.
Chapter 4 scraped and added to the file.
Chapter 5 scraped and added to the file.
Chapter 6 scraped and added to the file.
Chapter 7 scraped and added to the file.
Chapter 8 scraped and added to the file.
Chapter 9 scraped and added to the file.
Chapter 10 scraped and added to the file.
Chapter 11 scraped and added to the file.
Chapter 12 scraped and added to the file.
Chapter 13 scraped and added to the file.
Chapter 14 scraped and added to the file.
Chapter 15 scraped and added to the file.
Chapter 16 scraped and added to the file.
Chapter 17 scraped and added to the file.
Chapter 18 scraped and added to the file.
Chapter 19 scraped and added to the file.
Chapter 20 scraped and added to the file.
Chapter 21 scraped and added to the file.
Chapter 22 scraped and added to the file.
Chapter 23 scraped and added to the file.
Chapter 24 scraped and added to the file.
C

In [9]:
import requests
from bs4 import BeautifulSoup
from ebooklib import epub

def scrape_chapter(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extracting the chapter title
    title_tag = soup.find('h1')
    if title_tag:
        title = title_tag.text.strip()
    else:
        title = "No Title Found"

    # Extracting the main content of the chapter
    content_div = soup.find('div', class_='entry-content')  # Use the correct class or ID for the main content
    if content_div:
        paragraphs = content_div.find_all('p')
        chapter_text = '\n'.join(p.text.strip() for p in paragraphs)
    else:
        chapter_text = "No Content Found"

    # Return the extracted title and chapter content
    return title, chapter_text

# Create a new EPUB book
book = epub.EpubBook()

# Set metadata
book.set_identifier('id123456')
book.set_title('Sorcerer’s Handbook')
book.set_language('en')

# Add author
book.add_author('Author Name')  # You can replace this with the actual author's name

# Loop through all chapter URLs and add them to the EPUB
base_url = 'https://hoxionia.com/sorcerers-handbook-chapter-'
total_chapters = 352

for i in range(1, total_chapters + 1):
    chapter_url = f"{base_url}{i}/"
    title, chapter_text = scrape_chapter(chapter_url)

    # Create an EPUB chapter
    chapter = epub.EpubHtml(title=title, file_name=f'chap_{i}.xhtml', lang='en')
    chapter.content = f'<h1>{title}</h1><p>{chapter_text.replace("\n", "</p><p>")}</p>'

    # Add chapter to the book
    book.add_item(chapter)
    book.toc.append(chapter)
    book.spine.append(chapter)

    print(f"Chapter {i} scraped and added to the EPUB.")

# Define Table of Contents and Spine
book.toc = tuple(book.toc)

# Add navigation files
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())

# Write the EPUB file
epub.write_epub('Sorcerers_Handbook.epub', book, {})

print(f"All {total_chapters} chapters have been scraped and saved to 'Sorcerers_Handbook.epub'.")


Chapter 1 scraped and added to the EPUB.
Chapter 2 scraped and added to the EPUB.
Chapter 3 scraped and added to the EPUB.
Chapter 4 scraped and added to the EPUB.
Chapter 5 scraped and added to the EPUB.
Chapter 6 scraped and added to the EPUB.
Chapter 7 scraped and added to the EPUB.
Chapter 8 scraped and added to the EPUB.
Chapter 9 scraped and added to the EPUB.
Chapter 10 scraped and added to the EPUB.
Chapter 11 scraped and added to the EPUB.
Chapter 12 scraped and added to the EPUB.
Chapter 13 scraped and added to the EPUB.
Chapter 14 scraped and added to the EPUB.
Chapter 15 scraped and added to the EPUB.
Chapter 16 scraped and added to the EPUB.
Chapter 17 scraped and added to the EPUB.
Chapter 18 scraped and added to the EPUB.
Chapter 19 scraped and added to the EPUB.
Chapter 20 scraped and added to the EPUB.
Chapter 21 scraped and added to the EPUB.
Chapter 22 scraped and added to the EPUB.
Chapter 23 scraped and added to the EPUB.
Chapter 24 scraped and added to the EPUB.
C