# News search and summarize
## Harry Allison
July 2023, this is my first program. 
It reads multiple RSS news feeds and filters them by keywords. The results are then sent to an email address. 

There are two main functions: 
1. filter_rss will handle the RSS feeds and build the message to be sent
2. send_email will then handle the message sending 


In [4]:
my_openAI_key = input(str("API key:"))

# feedparser will help break up the rss feed, so we can search for keywords 
import feedparser
# Textblob excluded becuse it was only 50% accurate
# from textblob import TextBlob

# process_feeds is a new function we can call to import an RSS feed, filter it through keywords, and perform sentiment analysis
def process_feeds(feed_urls, keywords=None):
    message = "This is your daily automated news feed on food safety\n"
    message += "We are searching for the following keywords:\n" + str(keywords) + "\n\n"
    message += "Today's results ...\n\n"
    counter = 0
    for feed_url in feed_urls:
        # for every url in our list ..
        feed = feedparser.parse(feed_url)
        print("Processing feed: ", feed.feed.title)
        # if the status of the website is healthy (ie 200), continue, else skip the feed
        if feed.status == 200:
            # then we read in the title etc from the feed
#            print(f"Title: {feed.feed.get('title', '')}")
#            print(f"Description: {feed.feed.get('description', '')}\n")

            # looking for key words to section of different websites, and present them 
            # 'entry' means each story in the feed; the for loop goes through each entry (story) in turn to search against the keywords 
            for entry in feed.entries:
                if any(item.lower() in entry.title.lower() for item in keywords):
                    counter = counter +1 
                    message += entry.title + "\n"
                    message += entry.link + "\n"
                    message += "Sentiment: " + ai_sentiment(entry.title) + "\n" + "\n"
                    message += ai_summarize(entry.link) + "\n"
                    message += "---\n"
        # if website does not work print an error message 
        else:
            print("Error fetching feed")
    if counter == 0:
        message += "No matching searches found today"
    message += "\nEnd of message"
    print("\n\n")
    return message 
# **** END OF FUNCTION (process_feeds) ****

def send_email(receiver_email, subject, message):
    import smtplib
    from email.mime.text import MIMEText
    from email.mime.multipart import MIMEMultipart

    # Email configuration
    sender_email = 'mfsnewsfeed@hotmail.com'

    # SMTP server configuration
    smtp_server = 'smtp-mail.outlook.com'
    smtp_port = 587
    smtp_username = input(str("Username:"))
    smtp_password = input(str("Password:"))
    # Note that we're using a hotmail account to send the email. Many email services, like gmail will not allow a connection from SMTPLIB 


    # Create a multipart message
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = receiver_email
    msg['Subject'] = subject


    # Attach the message to the email
    msg.attach(MIMEText(message, 'plain'))


    # Create a secure connection to the SMTP server
    with smtplib.SMTP(smtp_server, smtp_port) as server:
        server.starttls()
        server.login(smtp_username, smtp_password)
        server.send_message(msg)
        print(msg)
    print("sent")
    
# **** END OF FUNCTION (send_email) ****

def ai_sentiment(title_in):
    import openai
    openai.api_key = my_openAI_key
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You will be provided with a news headline, and your task is to classify its sentiment as positive, neutral, or negative."
            },
            {
                "role": "user",
                "content": title_in 
            }
        ],
        temperature=0,
        max_tokens=64,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0
    )
    return response.choices[0].message['content'].strip()

# **** END OF FUNCTION (ai_sentiment)

import requests
from bs4 import BeautifulSoup
import openai

def scrape_article_text(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    article_body = soup.find("article")

    if article_body is None:
        return None  # or return an empty string: ""

    article_text = article_body.get_text(separator="\n")
    return article_text
# **** END OF FUNCTION (scrape_article_text)


def ai_summarize(article_in):
    openai.api_key = my_openAI_key

    scraped_text = scrape_article_text(article_in)

    if scraped_text is None or scraped_text == "":
        return " "

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You will be provided with a news article, and your task is to summarize it."
            },
            {
                "role": "user",
                "content": scraped_text
            }
        ],
        temperature=0,
        max_tokens=250,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0
    )
    return response.choices[0].message['content'].strip()
# **** END OF FUNCTION (ai_summarize)

# URLs and keywords to search
rss_feed_urls = [
    'https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/food-safety-recalls/rss.xml',
    'https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/food-allergies/rss.xml',
    'https://recalls-rappels.canada.ca/en/feed/cfia-alerts-recalls',
    'https://www.food.gov.uk/rss-feed/news',
#    'https://www.food-safety.com/rss/topic/303-food-defense',
#    'https://www.food-safety.com/rss/topic/305-recall-crisis-management',
    'http://feeds.bbci.co.uk/news/world/rss.xml',
    'http://feeds.bbci.co.uk/news/health/rss.xml',
    'http://rss.cnn.com/rss/cnn_us.rss',
    'http://rss.cnn.com/rss/cnn_world.rss',
    'http://rss.cnn.com/rss/cnn_health.rss',
    'https://www.cnbc.com/id/100727362/device/rss/rss.html',
    'http://rssfeeds.usatoday.com/UsatodaycomNation-TopStories',
    'https://www.healthshots.com/rss-feeds/health-news/',
    'https://news.google.com/rss/topics/CAAqJQgKIh9DQkFTRVFvSUwyMHZNR3QwTlRFU0JXVnVMVWRDS0FBUAE?hl=en-US&gl=US&ceid=US%3Aen&oc=11',
    'https://news.google.com/rss/topics/CAAqKggKIiRDQkFTRlFvSUwyMHZNRGx1YlY4U0JXVnVMVWRDR2dKSFFpZ0FQAQ?hl=en-US&gl=US&ceid=US%3Aen&oc=11'
    # Add more feed URLs here
]

# Top 40 food safety feeds: 
# https://rss.feedspot.com/food_safety_rss_feeds/

search_keywords = [
    "salmonella",
    "coli",
    "Toxoplasma",
    "Listeria",
    "Norovirus",
    "food safety",
    "food poisening",
    "bacteria",
    "contamination",
    "contaminated",
    "foodborne"
]

# calling the filter_rss fuction and printing it
# print("Calling process_feeds")
message_body = process_feeds(rss_feed_urls, search_keywords)
print(message_body)

# the message that is being sent to receiver_email
receiver_email = input(str("Target email:"))
subject = 'Daily Food Safety News Summary'
message = message_body

# calling the send_email function 
# send_email(receiver_email, subject, message)  








Processing feed:  FDA Food Safety Recalls RSS Feed
Processing feed:  Food Allergies
Processing feed:  Health and safety recalls and alerts
Processing feed:  RSS news
Processing feed:  BBC News - World
Processing feed:  BBC News - Health
Processing feed:  CNN.com - RSS Channel - US
Processing feed:  CNN.com - RSS Channel - World
Processing feed:  CNN.com - RSS Channel - Health
Processing feed:  International: Top News And Analysis
Processing feed:  GANNETT Syndication Service
Processing feed:  Healthshots - health-news
Processing feed:  Health - Latest - Google News
Processing feed:  World - Latest - Google News



This is your daily automated news feed on food safety
We are searching for the following keywords:
['salmonella', 'coli', 'Toxoplasma', 'Listeria', 'Norovirus', 'food safety', 'food poisening', 'bacteria', 'contamination', 'contaminated', 'foodborne']

Today's results ...

Estancia Holdings Recalls Tome Corse Sheep Ottavi Due to Potential Listeria monocytogenes Contamination


## Test a URL to scrape the clean article

In [41]:
url = "https://www.food-safety.com/articles/7759-jif-products-recalled-14-people-sickened-by-salmonella"

print(scrape_article_text(url))


None


In [None]:
                for item in keywords:
                    if item.lower() in entry.title.lower():
                        counter = counter +1 
                        message += entry.title + "\n"
                        message += entry.link + "\n"
                        message += "Sentiment: " + ai_sentiment(entry.title) + "\n" + "\n"
                        message += ai_summarize(entry.link) + "\n"
                        message += "---\n"