In [23]:
import feedparser
import pandas as pd
from dateutil.parser import parse
from bs4 import BeautifulSoup
import datetime

# List of RSS feed URLs
feeds = [
    "https://sfbwmag.com/category/industries/real-estate/feed/",
    "https://www.orlandosentinel.com/business/real-estate/feed/",
    "https://rss.bizjournals.com/feed/c81a9492d93a35a31af37cb5a395de22318b609a/10985?market=southflorida&selectortype=channel&selectorvalue=11",
    "https://www.floridarealtors.org/news-media/rss.xml",
    "https://rss.bizjournals.com/feed/c81a9492d93a35a31af37cb5a395de22318b609a/10986?market=southflorida&selectortype=channel&selectorvalue=2,11",
    "https://www.sarasotamagazine.com/home-and-real-estate/real-estate/feed",
    "https://naples.floridaweekly.com/category/business/real-estate-news/feed/",
    "https://www.sun-sentinel.com/business/real-estate/feed/",
    "https://www.gulfshorebusiness.com/category/commercial-real-estate/feed/",
    "https://www.gulfshorebusiness.com/category/residential-real-estate/feed/",
    "https://rss.bizjournals.com/feed/c81a9492d93a35a31af37cb5a395de22318b609a/10987?market=southflorida&selectortype=channel&selectorvalue=2,11",
    "https://tbbwmag.com/category/industries/real-estate/feed/",
    "https://spacecoastdaily.com/category/scd-real-estate/feed/",
    "https://www.yoursun.com/search/?f=rss&t=article&c=homeandstyle&l=50&s=start_time&sd=desc",
    "https://www.yoursun.com/search/?f=rss&t=article&c=venice&l=50&s=start_time&sd=desc",
    "https://www.miamitodaynews.com/real-estate/feed/",
    "https://www.miaminewtimes.com/miami/Rss.xml",
    "https://southfloridareporter.com/real-estate/feed/",
    "https://www.villages-news.com/category/real-estate/feed/",
    "https://www.midfloridanewspapers.com/search/?f=rss&t=article&c=highlands_news-sun/highlands_homes&l=50&s=start_time&sd=desc",
    "https://commercialobserver.com/market/south-florida/feed/",
    "https://www.yourobserver.com/rss/headlines/east-county/real-estate/"
]

data = {
    'Feed Title': [],
    'Article Title': [],
    'Published Date': [],
    'Content': []
}

# Initialize a dictionary to track the most recent entry for each feed
most_recent_entries = {}

# Parse each feed
for url in feeds:
    feed = feedparser.parse(url)
    feed_title = feed.feed.title if hasattr(feed.feed, 'title') else "Unknown feed"

    # Check each entry in the current feed
    for entry in feed.entries:
        entry_date = parse(entry.get('published', datetime.datetime.now().isoformat()))
        if url not in most_recent_entries or entry_date > most_recent_entries[url]['date']:
            # Process content to remove HTML tags
            content_html = entry.content[0].value if 'content' in entry else entry.summary
            soup = BeautifulSoup(content_html, 'html.parser')
            clean_content = soup.get_text(separator=' ', strip=True)

            # Update the most recent entry for this feed
            most_recent_entries[url] = {
                'date': entry_date,
                'title': entry.title,
                'content': clean_content,
                'feed_title': feed_title
            }

# Now populate the data dictionary with the most recent entries
for entry_info in most_recent_entries.values():
    data['Feed Title'].append(entry_info['feed_title'])
    data['Article Title'].append(entry_info['title'])
    data['Published Date'].append(entry_info['date'])
    data['Content'].append(entry_info['content'])

# Convert dictionary to DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df

# Optionally save the DataFrame to a CSV file
# df.to_csv('rss_feed_data.csv', index=False)

Unnamed: 0,Feed Title,Article Title,Published Date,Content
0,Real Estate Archives - S. Florida Business & W...,The Rider at Wynwood Sales Gallery Includes Mo...,2024-05-09 15:17:42+00:00,Miami-based Rilea Group has launched its new s...
1,Orlando real estate and housing news: Orlando ...,Property values in Seminole County continue to...,2024-05-29 22:08:06+00:00,Despite high interest rates and rising insuran...
2,Bizjournals.com Feed (2024-05-09 02:52:16),Lennar plans 800 new home lots around this Tex...,2024-05-31 21:12:33+00:00,Miami-based Lennar retained the No. 1 spot as ...
3,FloridaRealtors,CFPB Opens Inquiry into Mortgage Closing Costs,2024-05-31 12:05:40+05:00,The CFPB is seeking public input into mortgage...
4,Bizjournals.com Feed (2024-05-09 03:03:37),Lennar plans 800 new home lots around this Tex...,2024-05-31 21:12:33+00:00,Miami-based Lennar retained the No. 1 spot as ...
5,Real Estate - Sarasota Magazine,This Siesta Key Home Just Sold for $9.25 Milli...,2024-05-31 14:41:28-04:00,The sale of the waterfront home made for one o...
6,Real Estate News - Naples Florida Weekly,The Claridge in Pelican Bay,2024-05-30 04:00:00+00:00,"This lovely, renovated residence at 7515 Pelic..."
7,South Florida Real Estate News,Ask a real estate pro: How can I circumvent ne...,2024-05-30 10:00:18+00:00,"Q: I want to install a pool in my backyard, bu..."
8,Commercial Real Estate Archives - Gulfshore Bu...,"Collier rejects $23M price tag for 2,247 acres",2024-05-20 17:35:28+00:00,Collier County c ommissioners unanimously reje...
9,Residential Real Estate Archives - Gulfshore B...,"Charlotte County single-family home, condo pri...",2024-05-23 17:55:20+00:00,There was an uptick in April median and averag...
