<a href="https://colab.research.google.com/github/PammieSpammie/MIS730/blob/main/news_aggregator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install feedparser gradio schedule

Collecting feedparser
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting gradio
  Downloading gradio-4.44.1-py3-none-any.whl.metadata (15 kB)
Collecting schedule
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_

In [1]:
# Install necessary packages
#!pip install feedparser gradio schedule

import feedparser
import gradio as gr
import threading
import time
import schedule
import json
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime, timezone

# Initialize the JSON database
import os

db_file = 'user_data.json'

def get_default_data():
    return {
        'emails': [],
        'feeds': {},
        'frequency': 'Every Story',
        'stories': [],
        'preferences_saved': False
    }

def load_user_data():
    if not os.path.exists(db_file):
        return get_default_data()

    try:
        with open(db_file, 'r') as f:
            data = json.load(f)
        return data
    except json.JSONDecodeError:
        return get_default_data()

def save_user_data(data):
    with open(db_file, 'w') as f:
        json.dump(data, f)

# List of available RSS feeds
available_feeds = {
    'NASA News': 'https://www.nasa.gov/rss/dyn/breaking_news.rss',
    'BBC News': 'http://feeds.bbci.co.uk/news/rss.xml',
    'TechCrunch': 'http://feeds.feedburner.com/TechCrunch/'
}

# Function to set up user preferences
def setup_preferences(emails_input, selected_feeds, frequency, keywords_input):
    emails = [email.strip() for email in emails_input.split(',') if email.strip()]
    keywords = [kw.strip() for kw in keywords_input.split(',')] if keywords_input else []
    data = get_default_data()  # Start with default data
    data['emails'] = emails
    data['frequency'] = frequency
    data['feeds'] = {}
    for feed in selected_feeds:
        data['feeds'][feed] = {'url': available_feeds[feed], 'keywords': keywords}
    data['preferences_saved'] = True
    save_user_data(data)

    # Send initial stories
    send_initial_stories()

    # Clear any existing schedules before setting up new ones
    schedule.clear()

    # Set up scheduled monitoring based on frequency
    if frequency == 'Hourly':
        schedule.every().hour.do(monitor_feeds)
    elif frequency == 'Daily':
        schedule.every().day.at("08:00").do(monitor_feeds)
    else:  # 'Every Story'
        schedule.every(5).minutes.do(monitor_feeds)

    return "Preferences updated successfully. Initial stories sent and added to view."

# Function to monitor feeds and collect stories
def monitor_feeds():
    data = load_user_data()
    if not data.get('preferences_saved', False):
        print("Waiting for preferences to be saved...")
        return

    frequency = data.get('frequency', 'Every Story')
    feeds = data.get('feeds', {})
    emails = data.get('emails', [])
    seen_entries = set()

    for feed_name, feed_info in feeds.items():
        feed = feedparser.parse(feed_info['url'])
        keywords = feed_info.get('keywords', [])

        for entry in feed.entries:
            if entry.id in seen_entries:
                continue

            if not keywords or any(keyword.lower() in (entry.title + entry.summary).lower() for keyword in keywords):
                story = {
                    'title': entry.title,
                    'link': entry.link,
                    'summary': entry.summary if 'summary' in entry else '',
                    'published': entry.published if 'published' in entry else 'No date',
                    'feed': feed.feed.title
                }
                seen_entries.add(entry.id)

                if frequency == 'Every Story':
                    send_email([story], emails)
                else:
                    data = load_user_data()
                    data.setdefault('stories', []).append(story)
                    save_user_data(data)

    print(f"Checked feeds at {datetime.now()}")
from google.colab import userdata

# Function to send email
# Note, this sends from my email, and uses a special app email password. If you are
# going to use this function extensively, you will want to put in your own gmail, and you will
# want to create a secret in this colab notebook with your app password.
# Which you get by doing this.
#
# An App Password is a 16-digit passcode that gives a less secure app or device permission to access your Google Account. Here's how to set it up:
# a. Go to your Google Account settings: https://myaccount.google.com/
# b. Select "Security" on the left navigation panel
# c. Under "Signing in to Google," select "2-Step Verification" - you may need to sign in
# d. At the bottom of the page, select "App passwords"
# e. Select "Mail" as the app and "Other (Custom name)" as the device
# f. Follow the instructions to generate the App password
# g. Use this App password in your Python script instead of your regular password

def send_email(stories, emails):
    # Replace these with your email credentials
    sender_email = "jvnickerson@gmail.com"
    sender_password = userdata.get('email_password')


    subject = "Your News Update"

    # Prepare email content
    text_content = ""
    html_content = ""
    for story in stories:
        text_content += f"{story['feed']} - {story['title']}\n{story['published']}\n{story['summary']}\nRead more: {story['link']}\n\n"
        html_content += f"<h3>{story['feed']} - <a href='{story['link']}'>{story['title']}</a></h3>"
        html_content += f"<p><em>{story['published']}</em></p>"
        html_content += f"<p>{story['summary']}</p><hr>"

    message = MIMEMultipart("alternative")
    message["From"] = sender_email
    message["To"] = ", ".join(emails)
    message["Subject"] = subject

    part1 = MIMEText(text_content, "plain")
    part2 = MIMEText(html_content, "html")
    message.attach(part1)
    message.attach(part2)

    server = smtplib.SMTP("smtp.gmail.com", 587)
    server.starttls()
    server.login(sender_email, sender_password)
    server.sendmail(sender_email, emails, message.as_string())
    server.quit()

    print(f"Email sent to {', '.join(emails)}")

# Function to get the latest story from a feed
def get_latest_story(feed_url):
    feed = feedparser.parse(feed_url)
    if feed.entries:
        entry = feed.entries[0]
        return {
            'title': entry.title,
            'link': entry.link,
            'summary': entry.summary if 'summary' in entry else '',
            'published': entry.published if 'published' in entry else 'No date',
            'feed': feed.feed.title
        }
    return None

# Function to send initial stories
def send_initial_stories():
    data = load_user_data()
    if not data.get('preferences_saved', False):
        return  # Don't send anything if preferences aren't saved

    feeds = data.get('feeds', {})
    emails = data.get('emails', [])
    stories = []

    for feed_name, feed_info in feeds.items():
        story = get_latest_story(feed_info['url'])
        if story:
            stories.append(story)

    if stories and emails:
        send_email(stories, emails)

        # Add these stories to the user's story list
        data.setdefault('stories', []).extend(stories)
        # Keep only the latest 50 stories to limit the size
        data['stories'] = data['stories'][-50:]
        save_user_data(data)

        print(f"Sent initial stories from {len(stories)} feeds")

# Function to display matching stories
def display_stories():
    data = load_user_data()
    stories = data.get('stories', [])
    if not stories:
        return "No matching stories found yet."
    else:
        stories_html = ""
        for story in reversed(stories[-10:]):  # Show the last 10 stories
            # Use .get() method to provide a default value if 'feed' key is missing
            feed = story.get('feed', 'Unknown Source')
            stories_html += f"<h3>{feed} - <a href='{story['link']}' target='_blank'>{story['title']}</a></h3>"
            stories_html += f"<p><em>{story['published']}</em></p>"
            stories_html += f"<p>{story['summary']}</p><hr>"
        return stories_html

# Function to clear preferences
def clear_preferences():
    if os.path.exists(db_file):
        os.remove(db_file)
    schedule.clear()
    return "Preferences cleared successfully. JSON file removed and scheduler reset."

# Gradio interface
def gradio_app():
    with gr.Blocks() as demo:
        gr.Markdown("# 📰 Simplified News Aggregator")

        with gr.Tab("Setup Preferences"):
            gr.Markdown("Enter your email(s), select feeds, set frequency, and optional keywords.")
            emails_input = gr.Textbox(label="Email(s) (comma-separated):")
            feed_selection = gr.CheckboxGroup(list(available_feeds.keys()), label="Select Feeds:")
            frequency = gr.Radio(['Every Story', 'Hourly', 'Daily'], label="Email Frequency:")
            keywords_input = gr.Textbox(label="Keywords (optional, comma-separated):")
            setup_button = gr.Button("Save Preferences")
            status = gr.Textbox(label="Status", interactive=False)
            setup_button.click(fn=setup_preferences, inputs=[emails_input, feed_selection, frequency, keywords_input], outputs=status)

            # Add a new button to clear preferences
            clear_button = gr.Button("Clear Preferences")
            clear_status = gr.Textbox(label="Clear Status", interactive=False)
            clear_button.click(fn=clear_preferences, outputs=clear_status)

        with gr.Tab("View Stories"):
            stories_output = gr.HTML()
            refresh_button = gr.Button("Refresh Stories")
            refresh_button.click(fn=display_stories, outputs=stories_output)

    demo.launch(debug=True, share=True)

# Start the scheduler in a separate thread
def run_scheduler():
    while True:
        schedule.run_pending()
        time.sleep(1)

import threading
scheduler_thread = threading.Thread(target=run_scheduler, daemon=True)
scheduler_thread.start()

gradio_app()

ModuleNotFoundError: No module named 'feedparser'