## Importing the required libraries

In [14]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import yagmail
import os

## Scraping and Email Notifier

In [15]:
# Function to fetch webpage content
def fetch_webpage_content(url):
    """Fetches the content of the specified URL."""
    response = requests.get(url)
    return BeautifulSoup(response.content, 'lxml')

In [16]:
# Function to extract text from strong and span tags containing the keyword 'UPDATE'
def extract_updates(soup):
    """Extracts text from strong and span tags that contain the keyword 'UPDATE'."""
    tags = soup.find_all(['strong', 'span'])  # Find all strong and span tags
    updates = [tag.get_text(strip=True) for tag in tags if 'UPDATE' in tag.get_text(strip=True)]
    return updates

In [17]:
# Function to compare new data with old data
def compare_data(new_data, old_data_file):
    """Compares new scraped data with old data from a CSV file and returns unmatched rows."""
    old_df = pd.read_csv(old_data_file)
    new_df = pd.DataFrame(new_data, columns=['Text'])

    # Combine new and old data and drop duplicates to find new unmatched data
    combined_df = pd.concat([old_df, new_df])
    new_unmatched_df = combined_df.drop_duplicates(keep=False)

    return new_unmatched_df, new_df

In [18]:
# Function to send email notifications
def send_email(subject, body, recipients):
    """Sends an email using yagmail."""
    email_user = os.getenv('EMAIL_USER')
    email_password = os.getenv('EMAIL_PASSWORD')

    # Initialize yagmail and send email
    yag = yagmail.SMTP(email_user, email_password)
    yag.send(to=recipients, subject=subject, contents=body)

In [19]:
# Function to construct and send an email with the updates
def notify_updates(new_unmatched_df):
    """Constructs the email body and sends notifications for new updates."""
    if not new_unmatched_df.empty:
        df_string = new_unmatched_df.to_string(index=False)
        email_body = (
            "This is an automated mail. Here are the results for the scheduled trigger:\n\n"
            + df_string  # Include DataFrame content
            + "\n\nYou can review further details at the following URL:\n"
            + "https://aps-india.de/news/"
        )

        # Get recipient emails from environment variables
        recipient_emails = os.getenv('EMAIL_RECIPIENTS').split(',')

        # Send email
        send_email('APS News Updates', email_body, recipient_emails)
    else:
        print("No new updates to send via email.")

In [20]:
# Main process to scrape, compare, and notify
def main():
    """Main function to orchestrate the APS News Updates Notifier."""
    url = 'https://aps-india.de/news/'  # URL to scrape

    # Fetch webpage content
    soup = fetch_webpage_content(url)

    # Extract updates containing 'UPDATE'
    new_data = extract_updates(soup)

    # Compare new data with the previously scraped data in CSV
    old_data_file = 'extracted_updates.csv'
    new_unmatched_df, new_df = compare_data(new_data, old_data_file)

    # Save the new scraped data for future comparisons
    new_df.to_csv('extracted_updates.csv', index=False)

    # Send email notification if there are new updates
    notify_updates(new_unmatched_df)

if __name__ == "__main__":
    main()

No new updates to send via email.
