In [1]:
import feedparser
import pandas as pd

# Major Pipleline Functions

def acquire(url, rdata):
    response = feedparser.parse(url)
    rdata.append(response.entries)
    return rdata

def wrangle(rdata):
    # Initialize list to hold post data from 'rss_data'
    post_data = []
    # Convert each RSS entry to a DataFrame and append to list
    for entry in rdata:
        df = pd.DataFrame(entry)
        # Add source column for the platform that a given post originated from
        df = add_source(df)
        # Add time column, based on the source of the RSS data
        if df.at[0, 'source'] == "HNEWS":
            df['time'] = entry[0].updated
        else:
            df['time'] = df['updated']
        post_data.append(df)
    # Merge list of dataFrames together with concat
    post_data = pd.concat(post_data, axis=0, sort=False).reset_index()
    return post_data

def format_data(post_data):   
    return post_data[['source', 'title', 'link','time']]

def visualize(df):
    return df

#def export(df):
    #return df

In [2]:
# Secondary Functions

# Add a column "SOURCE" of constant strings to a DataFrame
def add_source(df):       
    if df['id'][0].startswith("https://www.reddit"):
        df['source'] = "REDDIT"
    elif df['id'][0].startswith("https://news.ycomb"):
        df['source'] = "HNEWS"
    else:
        df['source'] = "INSERT NEW SOURCE HERE"
    return df

In [3]:
def reader (rss_urls):
    # Initialize list to hold the RSS data entries from each URL
    rss_data = []
    for url in rss_urls:       
        rss_data = acquire(url, rss_data)
    print ('Number of RSS_Data Sources:', len(rss_data))
    # Convert RSS entries to a single DataFrame containing all posts
    posts = wrangle(rss_data)
    # Format list of posts into a human-readable table
    return format_data(posts)

urls = ['https://www.reddit.com/r/movies/new.rss', 'https://hnrss.org/newest']
reader(urls)

# TODO: Add column of unformated time strings to the DataFrame

Number of RSS_Data Sources: 2


Unnamed: 0,source,title,link,time
0,REDDIT,[Serious] Has there ever been a movie featurin...,https://www.reddit.com/r/movies/comments/c6qq1...,2019-06-28T22:28:00+00:00
1,REDDIT,Chuck Norris Fight Scenes,https://www.reddit.com/r/movies/comments/c6qp7...,2019-06-28T22:25:58+00:00
2,REDDIT,"Please recommend movies like ""The Butterfly Ef...",https://www.reddit.com/r/movies/comments/c6qlb...,2019-06-28T22:17:00+00:00
3,REDDIT,The Texas Chain Saw Massacre (1974) is Great Art,https://www.reddit.com/r/movies/comments/c6qj6...,2019-06-28T22:12:03+00:00
4,REDDIT,Venom and Spider-Man crossover movie will be ‘...,https://www.reddit.com/r/movies/comments/c6qbg...,2019-06-28T21:54:32+00:00
5,REDDIT,Fox's 'Alien Nation' remake has been paused/ca...,https://www.reddit.com/r/movies/comments/c6q5u...,2019-06-28T21:41:19+00:00
6,REDDIT,Whats The name of this movie,https://www.reddit.com/r/movies/comments/c6q51...,2019-06-28T21:39:25+00:00
7,REDDIT,The complete theatrical release schedule for J...,https://www.reddit.com/r/movies/comments/c6q43...,2019-06-28T21:37:10+00:00
8,REDDIT,New and Improved,https://www.reddit.com/r/movies/comments/c6q2x...,2019-06-28T21:34:45+00:00
9,REDDIT,Cool spanish horror movies?,https://www.reddit.com/r/movies/comments/c6q1o...,2019-06-28T21:31:42+00:00
