In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

In [None]:
def scrape_autism_alliance_news():
    url = 'https://autismalliance.ca/news/?news-topic=&year=&page=1'
    headers = {'User-Agent': 'Mozilla/5.0'}

    response = requests.get(url, headers=headers)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, 'html.parser')

    title_elements = soup.find_all('a', class_='archive-article__link')
    date_elements = soup.find_all('p', class_='archive-article__date')

    results = []

    for title_tag, date_tag in zip(title_elements, date_elements):
        title_text = title_tag.get_text(strip=True)
        date_text = date_tag.get_text(strip=True)

        # Convert string date to datetime object
        try:
            pub_date = datetime.strptime(date_text, '%B %d, %Y').date()
        except ValueError:
            continue  # skip invalid date formats

        # Apply filters
        if 'autism' in title_text.lower() or 'austim' in title_text.lower():
            if pub_date > datetime.today().date():
                results.append({
                    'title': title_text,
                    'date': pub_date,
                    'url': title_tag['href']
                })

    # Save results to CSV file
    if results:
        df = pd.DataFrame(results)
        df.to_csv('autism_alliance_news.csv', mode='a', header=not pd.io.common.file_exists('autism_alliance_news.csv'), index=False)
        print(f"{len(results)} new article(s) collected and saved.")
    else:
        print("No new articles with 'autism/austim' and a future date found.")

# Run the function if this script is executed
if __name__ == "__main__":
    scrape_autism_alliance_news()


No new articles with 'autism/austim' and a future date found.
