In [None]:
#EPUB
import requests
from lxml import html
import os

# URL of the LibGen book page
url = "https://libgen.li/ads.php?md5=164cd9a525c2829f5489439e9d60721b"

# Send a GET request to fetch the page content
response = requests.get(url)
response.raise_for_status()  # Ensure request was successful

# Parse the HTML content using lxml
tree = html.fromstring(response.content)

# Find the <a> tag that contains <h2>GET</h2> using XPath
get_link_element = tree.xpath("//a[h2[normalize-space()='GET']]")

if get_link_element:
    # Extract the href attribute (relative download link)
    relative_link = get_link_element[0].attrib.get("href", "")

    # Convert to absolute URL
    download_url = f"https://libgen.li/{relative_link.lstrip('/')}"  # Ensure no double slashes

    print("Download link found:", download_url)

    # Send a GET request to download the file
    file_response = requests.get(download_url, stream=True)
    file_response.raise_for_status()

    # Extract file name from headers if available
    content_disposition = file_response.headers.get("Content-Disposition")
    if content_disposition and "filename=" in content_disposition:
        file_name = content_disposition.split("filename=")[-1].strip().strip('"')
    else:
        # Fallback: Extract filename from the URL
        file_name = os.path.basename(download_url.split("?")[0])

    # Ensure the file has an .epub extension
    if not file_name.endswith(".epub"):
        file_name += ".epub"

    # Save the EPUB file
    with open(file_name, "wb") as file:
        for chunk in file_response.iter_content(chunk_size=8192):
            file.write(chunk)

    print(f"EPUB file downloaded successfully as {file_name}")

else:
    print("GET button not found!")


Download link found: https://libgen.li/get.php?md5=164cd9a525c2829f5489439e9d60721b&key=2XC3ZJ92DZYILEUU
EPUB file downloaded successfully as [Genghis 3 ] Iggulden, Conn - Bones of the Hills (2010, HarperCollins Publishers Limited) - libgen.li.epub


In [None]:
#PDF
import requests
from lxml import html
import os

# URL of the LibGen book page
url = "http://libgen.li/ads.php?md5=ed372ab968c3d988e0490962553aa3cf"

# Send a GET request to fetch the page content
response = requests.get(url)
response.raise_for_status()  # Ensure request was successful

# Parse the HTML content using lxml
tree = html.fromstring(response.content)

# Find the <a> tag that contains <h2>GET</h2> using XPath
get_link_element = tree.xpath("//a[h2[normalize-space()='GET']]")

if get_link_element:
    # Extract the href attribute (relative download link)
    relative_link = get_link_element[0].attrib.get("href", "")

    # Convert to absolute URL
    download_url = f"https://libgen.li/{relative_link.lstrip('/')}"  # Ensure no double slashes

    print("Download link found:", download_url)

    # Send a GET request to download the file
    file_response = requests.get(download_url, stream=True)
    file_response.raise_for_status()

    # Extract file name from headers if available
    content_disposition = file_response.headers.get("Content-Disposition")
    if content_disposition and "filename=" in content_disposition:
        file_name = content_disposition.split("filename=")[-1].strip().strip('"')
    else:
        # Fallback: Extract filename from the URL
        file_name = os.path.basename(download_url.split("?")[0])

    # Ensure the file has an .epub extension
    if not file_name.endswith(".pdf"):
        file_name += ".pdf"

    # Save the EPUB file
    with open(file_name, "wb") as file:
        for chunk in file_response.iter_content(chunk_size=8192):
            file.write(chunk)

    print(f"PDF file downloaded successfully as {file_name}")

else:
    print("GET button not found!")

Download link found: https://libgen.li/get.php?md5=ed372ab968c3d988e0490962553aa3cf&key=0SV3ILPSGLXH4XQE
PDF file downloaded successfully as  Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, Cliffo - Introduction to Algorithms (2009) - libgen.li.pdf


In [1]:
!pip install ebooklib beautifulsoup4 bleach



In [4]:
import os
from ebooklib import epub, ITEM_DOCUMENT, ITEM_IMAGE, ITEM_STYLE
from bs4 import BeautifulSoup
import bleach

SAFE_TAGS = [
    'html', 'head', 'body', 'title',
    'p', 'br', 'b', 'i', 'strong', 'em',
    'ul', 'ol', 'li', 'blockquote',
    'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
    'code', 'pre', 'hr'
]

SAFE_ATTRS = {}

def sanitize_html(html_content):
    soup = BeautifulSoup(html_content, "html.parser")
    cleaned_html = bleach.clean(
        str(soup),
        tags=SAFE_TAGS,
        attributes=SAFE_ATTRS,
        strip=True
    )
    return cleaned_html

def sanitize_epub(input_path):
    base, ext = os.path.splitext(input_path)
    output_path = f"{base}_sanitized{ext}"

    book = epub.read_epub(input_path)
    new_book = epub.EpubBook()

    # Copy metadata (safe fallback with .get)
    id_ = book.get_metadata('DC', 'identifier')
    title = book.get_metadata('DC', 'title')
    lang = book.get_metadata('DC', 'language')

    if id_: new_book.set_identifier(id_[0][0])
    if title: new_book.set_title(title[0][0])
    if lang: new_book.set_language(lang[0][0])

    for item in book.get_items():
        if item.get_type() == ITEM_DOCUMENT:
            cleaned_html = sanitize_html(item.get_content().decode('utf-8'))
            item.set_content(cleaned_html.encode('utf-8'))
            new_book.add_item(item)
        elif item.get_type() == ITEM_IMAGE:
            new_book.add_item(item)
        elif item.get_type() == ITEM_STYLE:
            continue
        else:
            new_book.add_item(item)

    new_book.spine = book.spine or ['nav']
    new_book.toc = book.toc or []

    epub.write_epub(output_path, new_book)
    print(f"Sanitized EPUB saved to: {output_path}")

# === Example usage ===
if __name__ == "__main__":
    sanitize_epub("Your_money_or_your_life.epub")  # Replace with your filename

Sanitized EPUB saved to: Your_money_or_your_life_sanitized.epub
