<a href="https://colab.research.google.com/github/OpenShamela/shamela_crawler/blob/master/shamela_crawler.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title اﻹعداد
!curl -LsSf https://astral.sh/uv/install.sh | sh
!uv pip install -q ipywidgets  --prerelease explicit
!jupyter nbextension enable --py widgetsnbextension --sys-prefix
!git clone https://github.com/OpenShamela/shamela_crawler.git --depth 1
%cd shamela_crawler
!uv sync --quiet --prerelease explicit

In [None]:
# -*- coding: utf-8 -*-
"""
Shamela Crawler EPUB Downloader
"""

# @title 2. ادخال الكتب والمعالجة
# @markdown شغل هذه الخلية ليظهر مكان إدخال الروابط

# @markdown أدخل عناوين الكتب التي تريد تحميلها كل كتاب في سطر

# @markdown اضغط زر البدء وانتظر اكتمال العملية حتى يظهر زر التحميل

import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import re
from pathlib import Path
import zipfile
from google.colab import files
from functools import partial

REPO_DIR = Path("/content/shamela_crawler/")
EPUB_OUTPUT_DIR = REPO_DIR
CONTENT_DIR = Path("/content")
ZIP_FILENAME = "shamela_epubs.zip"

# --- Widgets ---
urls_textarea = widgets.Textarea(
    value='',
    placeholder='أدخل رابط الكتب كل كتاب في سطر',
    description='روابط الكتب:',
    layout=widgets.Layout(width='100%', height='150px'),
    disabled=False
)

update_hamesh_checkbox = widgets.Checkbox(
    value=False,
    description='هوامش محسنة',
    disabled=False,
    indent=False,
    layout=widgets.Layout(margin='10px 0 0 0') # Add top margin
)

process_button = widgets.Button(
    description="البدء",
    button_style='primary',
    icon='cogs'
    )

process_output_area = widgets.Output() # To display processing messages
download_widgets_area = widgets.Output() # Area to display download buttons/list later

def extract_book_id(url):
    """Extracts the book ID from various Shamela URL formats."""
    if match := re.search(r'/(?:book/)?(\d+)/?$', url.strip()):
        return match.group(1)
    else:
        print(f"Warning: Could not extract book ID from URL: {url}")
        return None

def trigger_single_download(file_path, b):
    """Callback function to download a single file."""
    if file_path.exists():
        print(f"Starting download for: {file_path.name}")
        files.download(str(file_path))
    else:
        print(f"Error: File not found: {file_path}")

def create_and_download_zip(b):
    """Zips the generated EPUB files and initiates download."""
    with download_widgets_area: # Display messages in the download area
        print(f"\nCreating zip file: {ZIP_FILENAME}...")

        zip_path = CONTENT_DIR / ZIP_FILENAME
        try:
            zip_path.unlink(missing_ok=True) # Remove old zip
            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
                for epub_file_path in EPUB_OUTPUT_DIR.glob('*.epub'):
                    zipf.write(epub_file_path, arcname=epub_file_path.name)
            files.download(str(zip_path))
        except Exception as e:
            print(f"An error occurred during zipping or download: {e}")


def run_processing(b):
    """Function executed when the Process button is clicked."""
    # Disable button during processing
    process_button.disabled = True
    # Clear previous outputs
    process_output_area.clear_output()
    download_widgets_area.clear_output()

    with process_output_area:
        print("--- Starting Book Processing ---")
        urls = urls_textarea.value.splitlines()
        valid_urls = [url for url in urls if url.strip()] # Remove empty lines
        update_hamesh = update_hamesh_checkbox.value # Get checkbox value

        if not valid_urls:
            print("No valid URLs entered. Please enter URLs in the text area above.")
            process_button.disabled = False # Re-enable button
            return

        print(f"Found {len(valid_urls)} URLs to process.")
        for i, url in enumerate(valid_urls):
            print(f"\n--- Processing URL {i+1}/{len(valid_urls)}: {url} ---")
            book_id = extract_book_id(url)
            if book_id:
                scrapy_command = f"UV_PRERELEASE=explicit uv run scrapy crawl book -a book_id={book_id} -s MAKE_EPUB=true -s UPDATE_EPUB_HAMESH={str(update_hamesh).lower()} -s LOG_LEVEL=WARNING"
                !{scrapy_command}
            else:
                print(f"Skipping URL due to missing Book ID: {url}")


    # --- Display Download Options ---
    with download_widgets_area:
        # Create individual download widgets
        download_list_widgets = []
        for epub_path in EPUB_OUTPUT_DIR.glob('*.epub'):
            label = widgets.Label(value=epub_path.name, layout=widgets.Layout(flex='1')) # Allow label to expand
            button = widgets.Button(
                description="تحميل",
                button_style='info',
                tooltip=f'تحميل {epub_path.name}',
                icon='download',
                layout=widgets.Layout(width='auto') # Fit button to text
                )
            # Use partial to create a callback with the specific path fixed
            button.on_click(partial(trigger_single_download, epub_path))
            # Arrange label and button horizontally
            row = widgets.HBox([label, button], layout=widgets.Layout(margin='5px 0'))
            download_list_widgets.append(row)

        # Create the "Download All" button
        download_all_button = widgets.Button(
            description="تحميل الكتب ملف ZIP",
            button_style='success',
            tooltip=f'تحميل {ZIP_FILENAME}',
            icon='archive',
            layout=widgets.Layout(width='auto', margin='20px 0 0 0') # Add top margin
        )
        download_all_button.on_click(create_and_download_zip)

        separator = widgets.HTML("<hr>")
        all_download_widgets = widgets.VBox(download_list_widgets + [separator, download_all_button])
        display(all_download_widgets)

    process_button.disabled = False

process_button.on_click(run_processing)

display(urls_textarea, update_hamesh_checkbox, process_button, process_output_area)

display(download_widgets_area)

In [None]:
# @title 3. تحميل الكتب
display(download_widgets_area)