In [12]:
import os
from src.filter import filter_hadiths
from src.cleaner import clean_hadiths
from src.splitter import save_as_individual_cards

In [13]:
def process_hadith_book(input_path: str, book_name: str, output_dir: str):
    """
    Complete processing pipeline for a hadith book
    
    1. Filter by book name
    2. Clean the hadiths
    3. Save as individual cards
    
    Args:
        input_path: Path to input JSON file
        book_name: Name of book to process
        output_dir: Output directory for cards
    """
    # Step 1: Filter
    print(f"\nStarting processing for book: {book_name}")
    filtered = filter_hadiths(input_path, book_name)
    
    # Step 2: Clean
    cleaned = clean_hadiths(filtered)
    
    # Step 3: Split into cards
    cards_dir = save_as_individual_cards(cleaned, output_dir)
    
    print(f"\nFinished processing {len(cleaned)} hadiths from {book_name}")
    print(f"Cards saved in: {os.path.abspath(cards_dir)}")

### Sahih Bukhari

In [14]:
# Configuration
INPUT_FILE = "../DATA/ScrappingOutput/Sample_Hadiths.json"
BOOK_NAME = "صحيح البخاري"  # Change this to process different books
OUTPUT_DIR = os.path.join("../DATA/ProcessingOutput/", f"{BOOK_NAME}")


# Run the processing
process_hadith_book(INPUT_FILE, BOOK_NAME, OUTPUT_DIR)


Starting processing for book: صحيح البخاري
Filtered 4 hadiths for book: صحيح البخاري


Cleaning hadiths: 100%|██████████| 4/4 [00:00<00:00, 24070.61it/s]


Cleaned 4 hadiths


Creating cards: 100%|██████████| 4/4 [00:00<00:00, 3400.33it/s]

Created 4 hadith cards in ../DATA/ProcessingOutput/صحيح البخاري

Finished processing 4 hadiths from صحيح البخاري
Cards saved in: /home/mohamed/Desktop/GITHUP/HadithsDorr/DATA/ProcessingOutput/صحيح البخاري





### Sahih Muslim

In [15]:
# Configuration
INPUT_FILE = "../DATA/ScrappingOutput/Sample_Hadiths.json"
BOOK_NAME = "صحيح مسلم"
OUTPUT_DIR = os.path.join("../DATA/ProcessingOutput/", f"{BOOK_NAME}")


# Run the processing
process_hadith_book(INPUT_FILE, BOOK_NAME, OUTPUT_DIR)


Starting processing for book: صحيح مسلم
Filtered 4 hadiths for book: صحيح مسلم


Cleaning hadiths: 100%|██████████| 4/4 [00:00<00:00, 22162.77it/s]


Cleaned 4 hadiths


Creating cards: 100%|██████████| 4/4 [00:00<00:00, 1800.32it/s]

Created 4 hadith cards in ../DATA/ProcessingOutput/صحيح مسلم

Finished processing 4 hadiths from صحيح مسلم
Cards saved in: /home/mohamed/Desktop/GITHUP/HadithsDorr/DATA/ProcessingOutput/صحيح مسلم





### Other Books

In [16]:
# Configuration
INPUT_FILE = "../DATA/ScrappingOutput/Sample_Hadiths.json"
BOOK_NAME = "صحيح أبي داود"
OUTPUT_DIR = os.path.join("../DATA/ProcessingOutput/", f"{BOOK_NAME}")


# Run the processing
process_hadith_book(INPUT_FILE, BOOK_NAME, OUTPUT_DIR)


Starting processing for book: صحيح أبي داود
Filtered 1 hadiths for book: صحيح أبي داود


Cleaning hadiths: 100%|██████████| 1/1 [00:00<00:00, 7626.01it/s]


Cleaned 1 hadiths


Creating cards: 100%|██████████| 1/1 [00:00<00:00, 2084.64it/s]

Created 1 hadith cards in ../DATA/ProcessingOutput/صحيح أبي داود

Finished processing 1 hadiths from صحيح أبي داود
Cards saved in: /home/mohamed/Desktop/GITHUP/HadithsDorr/DATA/ProcessingOutput/صحيح أبي داود



