In [None]:
import os
import requests
from bs4 import BeautifulSoup
import time

# Create a folder for storing articles
if not os.path.exists('articles'):
    os.makedirs('articles')

def scrape_article(post_id):
    try:
        # Construct the URL for each article based on the given post ID
        article_url = f'https://alresalah.ps/post/{post_id}'
        response = requests.get(article_url)
        
        # If the page doesn't exist, skip to the next one
        if response.status_code == 404:
            print(f"No content found for post ID: {post_id}")
            return

        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract headline, date, and content
        headline_tag = soup.find('h1', class_='page-post-title font-weight-bold')
        if not headline_tag:
            print(f"No headline or content found for post ID: {post_id}")
            return

        headline = headline_tag.get_text(strip=True) if headline_tag else 'No Headline'

        time_tag = soup.find('time', class_='d-flex align-items-center')
        time_text = time_tag.get_text(strip=True) if time_tag else 'No Date'

        # Extract the article source
        source_tag = soup.find('h4', class_='page-post-source font-size-22 text-danger')
        source_text = source_tag.get_text(strip=True) if source_tag else 'No Source'

        article_tags = soup.find_all('div', class_='p-4 bg-white')
        article_texts = []
        for article in article_tags:
            for p3_div in article.find_all('div', class_='p-3'):
                p3_div.decompose()
            article_texts.append(article.get_text(separator='\n', strip=True))  # Preserve newlines

        article_content = "\n".join(article_texts)

        # Only save if there is content
        if article_content.strip():
            filename = f"articles/{post_id}_{headline[:50].replace('/', '-')}.txt"
            with open(filename, 'w', encoding='utf-8') as f:
                f.write(f"Title: {headline}\n")
                f.write(f"Published: {time_text}\n")
                f.write(f"Source: {source_text}\n\n")  # Store source separately
                f.write(article_content)
            print(f"Saved article from post ID: {post_id}")

    except Exception as e:
        print(f"Error in scraping article {post_id}: {e}")

def scrape_all_articles(start_id, end_id):
    for post_id in range(start_id, end_id + 1):
        print(f"Scraping post ID: {post_id}")
        scrape_article(post_id)
        time.sleep(1)  # Be polite to the server

# Start from post ID 2 up to a known or assumed maximum, e.g., 301000
scrape_all_articles(2, 301000)
