Global setup

In [None]:
try:
    with open("../global_setup.py") as setupfile:
        exec(setupfile.read())
except FileNotFoundError:
    print('Setup already completed')

In [None]:
import feedparser # Parsing the RSS feed
import pandas as pd # Displaying results in a data-frame
from afinn import Afinn # Sentiment analysis package
from IPython.core.display import display#, HTML # HTML displayer
from ipywidgets.widgets import Accordion, FloatText, Layout, Label, VBox, HTML, Dropdown

from notebooks.exercises.src.text.rsspedia import Rsspedia # Searching in Wiki for text matches using Okapi BM25
from src.text.document_retrieval.wikipedia import Wikipedia # Generic Wikipedia class

In [None]:
RSS_feeds = [('Politiken.dk', 'http://politiken.dk/rss/senestenyt.rss'), 
             ('DR.dk', 'http://www.dr.dk/Forms/Published/rssNewsFeed.aspx?config=6b82610d-b898-49b2-80ef-85c5642519c3&rss=Yes&rssTitle=DR+Nyheder+Online&overskrift=Politik+-+seneste+20&Url=%2fnyheder%2f'), 
             ('BT.dk', 'https://www.bt.dk/bt/seneste/rss'),
             ('Information.dk', 'https://www.information.dk/feed'),
             ('Børsen.dk', 'https://borsen.dk/rss/'),
             ('Ekstrabladet.dk', 'http://ekstrabladet.dk/seneste10.rss')
]

w = Dropdown(
    options={'Politiken.dk': 0, 
             'DR.dk': 1, 
             'BT.dk': 2,
             'Information.dk': 3,
             'Børsen.dk': 4,
             'Ekstrabladet.dk': 5
    },
    value=1,
    description='Vælg nyhedskilde:',
    disabled=False,
)
containter = VBox((w, HTML(value = "",)),)

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        clear_output()
        global container
        containter.value = ""
        #print(RSS_feeds[change['new']])
        feed = feedparser.parse(RSS_feeds[change['new']][1])

        afinn = Afinn(language = "da")
        data_titles = []
        data_scores = []
        # Get relevant objects from RSS feed ans store titles and scores
        for i in range(len(feed["entries"])):
            data_titles.append(feed["entries"][i]["title"])
            data_scores.append(afinn.score(feed["entries"][i]["title"]))

        # Dataframe
        pd.set_option('display.max_colwidth', -1) # Used to display whole title (non-truncated)
        df = pd.DataFrame({"Title": data_titles, "Score": data_scores}) # Creating the data frame and populating it

        # Highlight the positive and negative sentiments
        def highlight(s):
            if s.Score > 0:
                return ['background-color: #AAFFAA']*2
            elif s.Score < 0:
                return ['background-color: #FFAAAA']*2
            else:
                return ['background-color: #FFFFFF']*2

        df = df.style.apply(highlight, axis=1)

        containter.value = df
        display(container)

w.observe(on_change)

display(containter)

In [None]:
from notebooks.exercises.src.text.news_sentiment_1 import RSSDashboard
dashboard = RSSDashboard()
dashboard.start

In [None]:
# Get the Politiken RSS feed
#feed = feedparser.parse("http://politiken.dk/rss/senestenyt.rss")
#feed = feedparser.parse("http://www.dr.dk/Forms/Published/rssNewsFeed.aspx?config=6b82610d-b898-49b2-80ef-85c5642519c3&rss=Yes&rssTitle=DR+Nyheder+Online&overskrift=Politik+-+seneste+20&Url=%2fnyheder%2f")
#feed = feedparser.parse("https://www.bt.dk/bt/seneste/rss")
#feed = feedparser.parse("https://www.information.dk/feed")
#feed = feedparser.parse("https://borsen.dk/rss/")
feed = feedparser.parse("http://ekstrabladet.dk/seneste10.rss")

afinn = Afinn(language = "da")
data_titles = []
data_scores = []
# Get relevant objects from RSS feed ans store titles and scores
for i in range(len(feed["entries"])):
    data_titles.append(feed["entries"][i]["title"])
    data_scores.append(afinn.score(feed["entries"][i]["title"]))

# Dataframe
pd.set_option('display.max_colwidth', -1) # Used to display whole title (non-truncated)
df = pd.DataFrame({"Title": data_titles, "Score": data_scores}) # Creating the data frame and populating it

# Highlight the positive and negative sentiments
def highlight(s):
    if s.Score > 0:
        return ['background-color: #AAFFAA']*2
    elif s.Score < 0:
        return ['background-color: #FFAAAA']*2
    else:
        return ['background-color: #FFFFFF']*2

df = df.style.apply(highlight, axis=1)

df

In [None]:
# Initialize the wikipedia class and (down)load the vocabulary
wikipedia = Wikipedia(language="danish", cache_directory_url=None)
# Initialize the class to search RSS titles in the Wikipedia
rsspedia = Rsspedia(wikipedia)

In [None]:
rsspedia.search_wikipedia(data_titles)

In [None]:
#print(data_titles[1])
#import pprint
#pprint.pprint(rsspedia.search_results)
#display(HTML(rsspedia.search_results))

In [None]:
list_labels = []
for i in range(len(feed["entries"])):
    list_labels.append(HTML(value = rsspedia.search_results[i]))

accordion = Accordion(children = (list_labels),)

for i in range(len(feed["entries"])):
    accordion.set_title(i, "{}. {}".format(i + 1, data_titles[i]))

display(accordion)

In [None]:
from dasem.wikipedia import ExplicitSemanticAnalysis
#esa.related(u"Skal Nobelprisen i litteratur reddes fra den synkende svenske skude, så flyt den til Alexandria".lower())

In [None]:
esa = ExplicitSemanticAnalysis()

In [None]:
#esa.related(u"Skal Nobelprisen i litteratur reddes fra den synkende svenske skude, så flyt den til Alexandria".lower())

In [None]:
#import nltk
#nltk.download('punkt')

list_labels = []
for i in range(len(feed["entries"])):
    list_labels.append(HTML(value = str(esa.related(data_titles[i].lower(), n = 3))))

accordion = Accordion(children = (list_labels),)

for i in range(len(feed["entries"])):
    accordion.set_title(i, "{}. {}".format(i + 1, data_titles[i]))

display(accordion)

In [None]:
r = rsspedia.search_wikipedia([esa.related(data_titles[0].lower(), n = 3)[0][1]])
print(r)

In [None]:
print([esa.related(data_titles[0].lower(), n = 3)[0][1]])