# Tekstanalyse demo med kunstintelligens

**Global setup**

In [None]:
try:
    with open("../global_setup.py") as setupfile:
        exec(setupfile.read())
except FileNotFoundError:
    print('Setup already completed')

In [None]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height: 10000px;
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

**Import alle moduler**

In [None]:
import io # Read the prime-minister's speech file with UTF-8 encoding
import glob # Read the prime minister's speech files in a directory
import pandas as pd # Displaying results in a data-frame
import requests # Used to search wikipedia for the articles
import urllib.parse # Used to URL-encode the query strings

import matplotlib # Plotting
import matplotlib.pyplot as plt # Plotting
import numpy as np # Plotting

from scipy.interpolate import spline # Smoothing matplotlib graphs
from afinn import Afinn # Sentiment analysis package
from IPython.core.display import display#, HTML # HTML displayer
from ipywidgets.widgets import Accordion, HTML, interact_manual
from notebooks.exercises.src.text.rsspedia import Rsspedia # Searching in Wiki for text matches using Okapi BM25
from notebooks.exercises.src.text.news_sentiment_1 import RSSDashboard
from notebooks.exercises.src.text.news_sentiment_2 import PrimeMinisterSpeechDashboard
from src.text.document_retrieval.wikipedia import Wikipedia # Generic Wikipedia class

## Nyhedsanalyse 1: Sentiment

Du kan vælge mellem forskellige danske nyhedskilder og se de sidste nyheder med deres sentiment-scores.

In [None]:
RSSdb = RSSDashboard()
#display(RSSdashboard.widget_box)

def ff(i):
    RSSdb._do_sentiment_analysis(selected_value = i)
    
interact_manual(ff, i = RSSdb.select);

## Nyhedsanalyse 2: relevante Wikipedia sider 

In [None]:
# Initialize the wikipedia class and (down)load the vocabulary
wikipedia = Wikipedia(language="danish", cache_directory_url=None)


In [None]:
# Initialize the class to search RSS titles in the Wikipedia
rsspedia = Rsspedia(wikipedia)
rsspedia.search_wikipedia(RSSdb.data_titles)

list_labels = []
for i in range(len(RSSdb.data_titles)):
    list_labels.append(HTML(value = rsspedia.search_results[i]))

accordion = Accordion(children = (list_labels),)

for i in range(len(RSSdb.data_titles)):
    accordion.set_title(i, "{}. {}".format(i + 1, RSSdb.data_titles[i]))

display(accordion)

## Nyhedsanalyse 3: relevante Wikipedia sider

In [None]:
from dasem.wikipedia import ExplicitSemanticAnalysis
esa = ExplicitSemanticAnalysis()
#import nltk #nltk.download('punkt')
content_items = []
n_wiki_results = 3

for i in range(len(RSSdb.data_titles)):
    urls = []
    titles = []
    abstracts = []
    list_labels = esa.related(RSSdb.data_titles[i].lower(), n = n_wiki_results)
    for j in range(n_wiki_results):
        url = "https://da.wikipedia.org/w/api.php?action=query&prop=extracts&exintro&titles={}&format=json&redirects" \
              .format(urllib.parse.quote_plus(list_labels[0][j].replace(" ","_")))
        json_content = requests.get(url).json()
        content_item = next(iter(json_content["query"]["pages"].values()))
        urls.append(url)
        titles.append(content_item["title"])
        abstracts.append(content_item["extract"])
    content_items.append(HTML(value = "{}{}".format(list_labels[0], rsspedia.display_beautifully(titles, abstracts, urls))))

accordion = Accordion(children = (content_items),)

for i in range(len(RSSdb.data_titles)):
    accordion.set_title(i, "{}. {}".format(i + 1, RSSdb.data_titles[i]))

display(accordion)

# Sentimentanalyse af statsministeren's tale
Statsministerens tale ved Folketingets åbning gennem år

In [None]:
afinn = Afinn(language = "da")
speeches = {}
speeches_sentiments = {}
for filepath in glob.iglob('data/statsminister/*.txt'):
    speeches[os.path.basename(filepath).replace(".txt","")] = [line.rstrip('\n') for line in open(filepath, mode="r", encoding="utf-8")]
    current_sentiment = 0
    for line in speeches[os.path.basename(filepath).replace(".txt","")]:
        current_sentiment += afinn.score(line)
    speeches_sentiments[os.path.basename(filepath).replace(".txt","")] = current_sentiment

lists = sorted(speeches_sentiments.items()) # sorted by key, return a list of tuples
x, y = zip(*lists) # unpack a list of pairs into two tuples



In [None]:
fig, ax = plt.subplots(figsize=(20, 10))
plt.ylim(bottom = -50, top = 300)
xposition = [5, 12, 14, 18]
for xc in xposition:
    plt.axvline(x = xc, color='k', linestyle='--')
ax.tick_params(labelsize = 13)
ax.plot(x, y, color="black", linewidth = 4)
ax.set_xlabel('År', fontsize=16)
ax.set_ylabel('Sentiment', fontsize=15)
ax.set_title('Statsministeren\'s tale sentiment', fontsize=18)

plt.text(19, -40, "Lars Løkke Rasmussen", size=15, rotation=90., ha="center", va="bottom",
         bbox=dict(boxstyle="round", ec=(0.5, 0.5, 1), fc=(0.8, 0.8, 1),))
plt.text(15, -40, "Helle Thoring-Schmidt", size=15, rotation=90., ha="center", va="bottom",
         bbox=dict(boxstyle="round", ec=(0.5, 1, 0.5), fc=(0.8, 1, 0.8),))
plt.text(13, -40, "Lars Løkke Rasmussen", size=15, rotation=90., ha="center", va="bottom",
         bbox=dict(boxstyle="round", ec=(0.5, 0.5, 1), fc=(0.8, 0.8, 1),))
plt.text(6, -40, "Anders Fogh Rasmussen", size=15, rotation=90., ha="center", va="bottom",
         bbox=dict(boxstyle="round", ec=(1, 0.5, 0.5), fc=(1, 0.8, 0.8),))
plt.text(0, -40, "Poul Nyrup Rasmussen", size=15, rotation=90., ha="center", va="bottom",
         bbox=dict(boxstyle="round", ec=(0.5, 0.5, 0.5), fc=(0.8, 0.8, 0.8),))

ax.grid()
plt.show()

In [None]:
pmSpeechDashboard = PrimeMinisterSpeechDashboard()
pmSpeechDashboard.load_speeches()

def f(i):
    pmSpeechDashboard._do_sentiment_analysis(speech_number = i, use_exp_smoothing = False)
    
interact_manual(f, i = pmSpeechDashboard.select);