### HCI 574 - HW 10

PLease make sure to first read the HW10 instructions on Canvas! The stes below are just recaps of the more detailed instructions, primarily to give you a in-code roadmap.

In [None]:
# Steps for the TKinter version

# 1) Search term entry
# Look at the text editor code at the end of lecture 31
# re-use the Label and the (text) Entry widget 
# add a Run search button that will run your processing method
# and/or bind() the Return key into the Entry widget to run your processing method  
# use the grid geometry manager to put them into your first row
# re-use the code for creating a ScrolledText widget 
# you will use insert() in your processing method later to display the results

# 2) Scrape google news for the RSS feed
# This has to be done in your processing method 
# Look at lecture 38 for how to: 
# a) assemble the URL for your search
# b) use requests to download the RSS document 
# c) use feedparser to create a list of entries (RSS item tags)

# 3) output the results
# loop over your list of entries, optionally keep a counter for the running number
# pull out date/time, title and link URL 
# starting with the optional running number, glue all that together into a single string with a \n at the end
# in your Text widget use insert(END, <your string>) to print out each line


In [1]:
# Steps for the Web app

# 1) Search term entry
# look at lecture 36 to generate a HTML file with a form tag that contains 
#   a text input (for the search term) 
#   a submit input (to trigger the search
# You can do this via constructing a string with the HTML inside or via a template 
# if you use a template, you won't need any of the {{ }} Jinja inlining stuff I used in the lecture
# Write a Flask server with two routes
# a) an main route, which will serve up your HTML (string or file) that contains the form tag
# b) a results route that will display your running number, date/time, title and link for the requested RSS feed
# Ensure that action property in your form tag points to this route 
# E.g. if the route is called results in Python, use action="/result/" in HTML

# 2) Scrape google news for the RSS feed
# This has to be done in your results route
# your will get the search term with request.args["URL"], assuming you used name="URL" in your text input tag 
# Look at lecture 38 for how to: 
# a) assemble the URL for your search
# b) use requests to download the RSS document 
# c) use feedparser to create a list of entries (RSS item tags)

# 3) output the results
# loop over your list of entries, optional keep a counter for the running number
# pull out date/time, title and link URL 
# from these create and anchor tag 
# html_code_for_item = "{:s} <a href={:s}> {:s} </a><br>".format(date, URL, title)
# or "{:d}: {:s} <a href={:s}> {:s} </a><br>".format(n, date, URL, title) if you use a running number n
# glue all html_code_for_item strings together into a single string
# In your results route, return this single string to show it in the browser
# Optional: you could create a proper HTML page (see beginning of lecture 35) and use your single string inside the body tag
# or you could create another template html file and use {{ my_result_string }} to inline it in the body tag 

In [None]:
'''This app is hosted on pythonanywhere, feel free to visit it at this link: https://dtang.pythonanywhere.com/'''

In [None]:
''' Package installs and imports, uncomment to run installs if needed'''
'''
%pip install requests
%pip install bs4
%pip install Flask
%pip install feedparser
'''

from urllib.parse import urlencode
from flask import Flask, request, render_template
import random
import string
import requests
import feedparser
import requests 
import bs4 


In [51]:
app = Flask("RSS Search Engine")

@app.route("/")  
def main_page():
    html_str = render_template('index.html', title="RSS Search Engine") # title will be inlined in {{ title }}
    return html_str  # give it to the browser to display the inline page


def get_links(anchors_tags):
    links = []
    for a in anchors_tags:
        l = a.get('href')
        if l != None and l.startswith("http"): # only collect external (internet) links
            links.append(str(l))    # convert to string and append to list
    return links

''' Get method triggered by form to perform '''
@app.route('/result/', methods=["GET"])
def result_page():
    
    '''Get search term from form using request.args'''
    search_term = request.args["search_term"]
    print("Searching RSS news based on", search_term)
    
    
    base_url = "https://news.google.com/rss/search"
    query_params = {
        'q': search_term
    }

    '''put base url and search term together to get ready to get rss feed data'''
    rss_feed_url = f"{base_url}?{urlencode(query_params)}"
    print("query:", rss_feed_url)

    '''Return a moderately long string with jumbled letters to spoof google news'''
    def random_name():
        name_list = []
        length = random.randint(5, 12)
        for i in range(0, length):
            name_list.append(random.choice(string.ascii_letters))
        return "".join(name_list)
    
    '''Function to retrieve rss feeds based on the rss feed url given'''
    def get_rss_feeds():
        r = requests.get(rss_feed_url, headers={'User-agent': random_name()})
        r.raise_for_status()

        f = feedparser.parse(r.text) # parse into a dictionary
        list_of_entries = f.entries
        return list_of_entries
    
    '''Code to parse data from rss feed into a long string'''
    entries_string = ""
    for num_item, item in enumerate(get_rss_feeds()): 
        entries_string = entries_string + str(num_item+1) + " " + item["updated"] + " " + "<a href=\"" + item["link"] + "\"> " + item["title"] + " </a><br>"
        

    '''HTML structure to output the results from RSS Search Results'''
    html = """
        <html>
          <head>
            <link rel="stylesheet" href="/static/styles.css"/>
          </head>  
          <body>
            <h1>RSS news results for """ + search_term + """</h1>
            <div class="search-form">
            <form action="/" method="get">
              <input type="submit" value="Back to Main Page"> 
            </form></div><h3>""" + entries_string + """</h3>
            
          </body>
        </html>"""
    return html # show finished web page

''' error handling if needed'''
@app.errorhandler(500)
def page_not_found(error):
    print(error) 
    s = "Error with " + str(request.args["URL"]) + "<br>" + str(error)   
    s = s + "<br>Hit the Back button and try something else ...)"
    return s

"""code to start server"""
from socket import gethostname
if 'liveweb' not in gethostname(): # all pythonanywhere servers have liveweb in their name
    app.run(debug=False, port=8080)
# http://127.0.0.1:8080/

 * Serving Flask app 'RSS Search Engine'
 * Debug mode: off


 * Running on http://127.0.0.1:8080
Press CTRL+C to quit
127.0.0.1 - - [06/May/2024 12:31:15] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [06/May/2024 12:31:15] "GET /static/styles.css HTTP/1.1" 200 -


Searching RSS news based on monty python
query: https://news.google.com/rss/search?q=monty+python


127.0.0.1 - - [06/May/2024 12:31:20] "GET /result/?search_term=monty+python HTTP/1.1" 200 -
127.0.0.1 - - [06/May/2024 12:31:20] "GET /static/styles.css HTTP/1.1" 304 -
