# Tekstanalyse demo med kunstintelligens

**Global setup**

In [None]:
try:
    with open("../global_setup.py") as setupfile:
        exec(setupfile.read())
except FileNotFoundError:
    print('Setup already completed')

**Import alle moduler**

In [None]:
import io # Read the prime-minister's speech file with UTF-8 encoding
import pandas as pd # Displaying results in a data-frame
import requests # Used to search wikipedia for the articles
import urllib.parse # Used to URL-encode the query strings

from afinn import Afinn # Sentiment analysis package
from IPython.core.display import display#, HTML # HTML displayer
from ipywidgets.widgets import Accordion, HTML
from notebooks.exercises.src.text.rsspedia import Rsspedia # Searching in Wiki for text matches using Okapi BM25
from notebooks.exercises.src.text.news_sentiment_1 import RSSDashboard
from src.text.document_retrieval.wikipedia import Wikipedia # Generic Wikipedia class

## Nyhedsanalyse 1: Sentiment

Du kan vælge mellem forskellige danske nyhedskilder og se de sidste nyheder med deres sentiment-scores.

In [None]:
RSSdashboard = RSSDashboard()
display(RSSdashboard.widget_box)

## Nyhedsanalyse 2: relevante Wikipedia sider 

In [None]:
# Initialize the wikipedia class and (down)load the vocabulary
wikipedia = Wikipedia(language="danish", cache_directory_url=None)
# Initialize the class to search RSS titles in the Wikipedia
rsspedia = Rsspedia(wikipedia)
rsspedia.search_wikipedia(RSSdashboard.data_titles)

list_labels = []
for i in range(len(RSSdashboard.data_titles)):
    list_labels.append(HTML(value = rsspedia.search_results[i]))

accordion = Accordion(children = (list_labels),)

for i in range(len(RSSdashboard.data_titles)):
    accordion.set_title(i, "{}. {}".format(i + 1, RSSdashboard.data_titles[i]))

display(accordion)

## Nyhedsanalyse 3: relevante Wikipedia sider

In [None]:
from dasem.wikipedia import ExplicitSemanticAnalysis
esa = ExplicitSemanticAnalysis()
#import nltk #nltk.download('punkt')
content_items = []
n_wiki_results = 3

for i in range(len(RSSdashboard.data_titles)):
    urls = []
    titles = []
    abstracts = []
    list_labels = esa.related(RSSdashboard.data_titles[i].lower(), n = n_wiki_results)
    for j in range(n_wiki_results):
        url = "https://da.wikipedia.org/w/api.php?action=query&prop=extracts&exintro&titles={}&format=json&redirects".format(urllib.parse.quote_plus(list_labels[0][j].replace(" ","_")))
        json_content = requests.get(url).json()
        content_item = next(iter(json_content["query"]["pages"].values()))
        urls.append(url)
        titles.append(content_item["title"])
        abstracts.append(content_item["extract"])
    content_items.append(HTML(value = "{}{}".format(list_labels[0], rsspedia.display_beautifully(titles, abstracts, urls))))

accordion = Accordion(children = (content_items),)

for i in range(len(RSSdashboard.data_titles)):
    accordion.set_title(i, "{}. {}".format(i + 1, RSSdashboard.data_titles[i]))

display(accordion)

# Sentimentanalyse af statsministeren's tale
Statsministerens tale ved Folketingets åbning, 2. oktober 2018

In [None]:
lines = [line.rstrip('\n') for line in open("data/statsminister/2018-1.txt", mode="r", encoding="utf-8")]
afinn = Afinn(language = "da")
scores = []
for i in range(len(lines)):
    scores.append(afinn.score(lines[i]))

# Dataframe
pd.set_option('display.max_colwidth', -1) # Used to display whole title (non-truncated)
df = pd.DataFrame({"Line": lines, "Score": scores}) # Creating the data frame and populating it

# Highlight the positive and negative sentiments
def highlight(s):
    if s.Score > 0:
        return ['background-color: #AAFFAA']*2
    elif s.Score < 0:
        return ['background-color: #FFAAAA']*2
    else:
        return ['background-color: #FFFFFF']*2

df = df.style.apply(highlight, axis=1)
display(df)