In [1]:
from fastai.text.all import *
from fastai.data.external import *
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
class FwdHook():
    def __init__(self, m):
        self.hook = m.register_forward_hook(self.hook_fn)
    def hook_fn(self, m, i, o):
        self.stored = o.detach().clone()
    def __enter__(self, *args):
        return self
    def __exit__(self, *args):
        self.hook.remove()
        
class BwdHook():
    def __init__(self, m):
        self.hook = m.register_backward_hook(self.hook_fn)
    def hook_fn(self, m, gi, go):
        self.stored = go[0].detach().clone()
    def __enter__(self, *args):
        return self
    def __exit__(self, *args):
        self.hook.remove()
        

In [26]:
VERBOSE=False

TEXTS = {"plot_title" : {"en" : "Movie Review Analyzer", "de" : "Filmkritik Analyse"},
         "dpd_lang" : {"en" : "Language", "de" : "Sprache"},
         "btn_doc" : {"en" : "Show Documentation", "de" : "Dokumentation Anzeigen"},
         "btn_sample_review" : {"en" : "Sample Review", "de" : "Beispiel Filmkritik"},
         "btn_free_text" : {"en" : "Free Text Review", "de" : "Freitext Filmkritik"},
         "btn_header" : {"en" : "Movie Review Analyzer", "de" : "Filmkritik Analyse"},
         "btn_submit" : {"en" : "Analyze Review", "de" : "Filmkritik analysieren"},
         "btn_status_init" : {"en" : "", "de" : ""},
         "btn_status_progress" : {"en" : "Processing - ", "de" : "Verarbeitung läuft - "},
         "btn_status_default" : {"en" : "Please Wait", "de" : "Bitte Warten"},
         "btn_status_ready" : {"en" : "Ready For User Input", "de" : "Bereit für Benutzereingabe"},
        }

HTML_EN = """<div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">
<h1 id="movie-review-analyzer">Movie Review Analyzer</h1>
<p><a href="https://github.com/Jack-Byte/Movie-Review-Analyzer">Movie Review Analyzer</a> is a web application that processes a movie review, and then determines whether it is a positive or negative review. A line graph shows how the particular word contributed to the final classification. If the word has a large diameter and is located higher, it was one of the deciding factors for the classification.</p>
<table>
<thead><tr>
<th>Icon</th>
<th>Name</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Pan.png" alt=""></td>
<td>Pan</td>
<td>The pan tool allows the user to pan the plot by left-dragging a mouse or dragging a finger across the plot region.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/BoxZoom.png" alt=""></td>
<td>BoxZoom</td>
<td>The box zoom tool allows the user to define a rectangular region to zoom the plot bounds to. This is done by left-dragging a mouse, or dragging a finger across the plot area.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/WheelZoom.png" alt=""></td>
<td>WheelZoom</td>
<td>The wheel zoom tool will zoom the plot in and out, centered on the current mouse location. It will respect any min and max values and ranges, preventing zooming in and out beyond these values.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Save.png" alt=""></td>
<td>Save</td>
<td>The save tool pops up a modal dialog that allows the user to save a PNG image of the plot.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Reset.png" alt=""></td>
<td>Reset</td>
<td>The reset tool will restore the plot ranges to their original values.</td>
</tr>
</tbody>
</table>
</div>"""

HTML_DE = """<div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">
<h1 id="movie-review-analyzer">Movie Review Analyzer</h1>
<p>Der <a href="https://github.com/Jack-Byte/Movie-Review-Analyzer">Movie Review Analyzer</a> Movie Review Analyzer ist eine Webanwendung, die eine Filmkritik verarbeitet, und dann ermittelt ob es sich um eine positive oder negative Kritik handelt. Ein Liniendiagramm zeigt wie das jeweilige Wort zu der letztlichen Einordnung beigetragen hat. Wenn das Wort einen großen Durchmesser hat und höher gelegen ist, war es mit ausschlaggebend für die Klassifizierung.</p>
<table>
<thead><tr>
<th>Icon</th>
<th>Name</th>
<th>Beschreibung</th>
</tr>
</thead>
<tbody>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Pan.png" alt=""></td>
<td>Verschieben</td>
<td>Mit dem Verschieben-Werkzeug kann der Benutzer den das Bild verschieben.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/BoxZoom.png" alt=""></td>
<td>Box-Zoom</td>
<td>Mit dem Box-Zoom-Werkzeug kann der Benutzer einen rechteckigen Bereich definieren, auf den die Diagrammgrenzen gezoomt werden. Dies geschieht durch Ziehen mit der linken Maustaste oder durch Ziehen mit dem Finger über den Diagrammbereich.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/WheelZoom.png" alt=""></td>
<td>Rad-Zoom</td>
<td>Mit dem Rad-Zoom-Werkzeug wird die Darstellung ein- und ausgezoomt, zentriert auf die aktuelle Mausposition.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Save.png" alt=""></td>
<td>Speichern</td>
<td>Das Speicherwerkzeug öffnet einen Dialog, der es dem Benutzer ermöglicht, ein PNG-Bild des Diagramms zu speichern.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Reset.png" alt=""></td>
<td>Zurücksetzen</td>
<td>Das Rücksetzwerkzeug stellt das Diagramm auf ihre ursprünglichen Werte zurück.</td>
</tr>
</tbody>
</table>
</div>"""

DOC = {"en" : HTML_EN,
       "de" : HTML_DE}

lang="en"



# data viz stuff
TOOLTIPS = [("Word", "@word"),
           ]

In [42]:
def init():
    global df, learn
    with output:
        displayWaitMessage()
        print('Downloading IMDB Review data set')
        path = untar_data(URLs.IMDB_SAMPLE)
        df = pd.read_csv(path/'texts.csv')
        print('Downloading model (AWD_LSTM for movie review sentiment from fastai)')
        dls = TextDataLoaders.from_df(df, path=path, text_col='text', label_col='label', valid_col='is_valid', seq_len=1)
        learn = text_classifier_learner(dls, AWD_LSTM)
        displayReadyness()

def predictSentiment():
    splits = text_area.value.split()
    # text word by word
    text_wbw = [' '.join(splits[:i+1]) for i in range(len(splits))]
    
    cas = []
    learn.model.reset()
    with BwdHook(learn.model[1].layers[0][2]) as bhook:
        with FwdHook(learn.model[1].layers[0][2]) as fhook:
            tdl = learn.dls.test_dl(text_wbw, bs=len(text_wbw))
            b = tdl.one_batch()
            output = learn.model.eval()(b[0])
            acs = fhook.stored
        # determine predicted classification for the complete text
        cat = output[0][0].argmax()
        # calc grad for each batch item 
        for i in range(output[0].shape[0]):
            output[0][i,cat].backward(retain_graph=True)
            grad = bhook.stored
            cas.append( (acs[i]*grad).sum().detach() )

    cas = np.array(cas)
    
    return cat, cas, splits

def plotClassActivation(cat, cas, splits):
    tmp = pd.DataFrame(cas, columns=['class activation'])
    tmp['word'] = splits
    tmp['x_values'] = tmp.index.values
    tmp['size'] = tmp['class activation']*100
    #tmp['size'] = tmp['size']**2
    source = ColumnDataSource(data=tmp) 
    sentiment = 'positive' if cat else 'negative'
    p = figure(sizing_mode='stretch_both',
               title=TEXTS["plot_title"][lang] + f' - {sentiment} sentiment' ,
               output_backend="webgl",
               tooltips=TOOLTIPS)
    clr = 'green' if cat else 'red'
    p.line(x='x_values', y='class activation', color=clr, source=source)
    p.circle(x='x_values', y='class activation', size='size', color=clr, source=source)
    
    return p

In [32]:
from bokeh.plotting import ColumnDataSource, figure, output_notebook, show
from bokeh.models import Text, Rect, HoverTool, Range1d
from ipywidgets import Label, Button, Output, VBox, AppLayout, Layout, Dropdown, Textarea
import panel as pn
from IPython.core.display import HTML

# GUI
pn.extension()

#defining widgets
dpd_lang = Dropdown(options=['en', 'de'], value='en', 
                    description=TEXTS["dpd_lang"][lang], layout=Layout(height='auto', width='auto'))
btn_doc = Button(description=TEXTS["btn_doc"][lang], layout=Layout(height='auto', width='auto'))
btn_sample_review = Button(description=TEXTS["btn_sample_review"][lang], layout=Layout(height='auto', width='auto'))
btn_clear_text  = Button(description=TEXTS["btn_free_text"][lang], layout=Layout(height='auto', width='auto'))
btn_submit = Button(description=TEXTS["btn_submit"][lang], layout=Layout(height='auto', width='auto'))
btn_header = Button(description=TEXTS["btn_header"][lang], disabled=True, layout=Layout(height='auto', width='auto'))
btn_status = Button(description=TEXTS["btn_status_init"][lang], disabled=True, layout=Layout(height='auto', width='auto'))
#pre_output = Output(clear_output=True)
output = Output(clear_output=True)
text_area = Textarea(layout=Layout(height='auto', width='auto'))
btn_submit.style.button_color = 'green'

# defining event functions
def displayWaitMessage(message=TEXTS["btn_status_default"][lang]):
    btn_status.description = f'{TEXTS["btn_status_progress"][lang]} {message}'
    btn_status.style.button_color = 'yellow'

def displayReadyness():
    btn_status.description = TEXTS["btn_status_ready"][lang]
    btn_status.style.button_color = 'lightgreen'


def analyzeReview():
    emptyPlot()
    displayWaitMessage()
    with output:
        cat, cas, splits = predictSentiment()
        p = plotClassActivation(cat, cas, splits)
        display(pn.pane.Bokeh(p))
        displayReadyness()
        end = time.time()
        if VERBOSE: print('took', end-start, 'for displaying the preds')
        
    
def on_btn_sample_review(b):
    review_text = df.text.sample(1).values[0]
    text_area.value = review_text
    emptyPlot()

def on_btn_clear_text(b): 
    text_area.value = ''
    emptyPlot()
    
def on_btn_submit(b): analyzeReview()

def on_btn_doc_clicked(b):
    output.clear_output()
    #pre_output.clear_output()
    with output:
        display(HTML(DOC[lang]))
    
def on_lang_select(change):
    global lang 
    lang = dpd_lang.value
    dpd_lang.description=TEXTS["dpd_lang"][lang]
    btn_free_text.description=TEXTS["btn_free_text"][lang]
    btn_sample_review.description=TEXTS["btn_sample_review"][lang]
    btn_submit.description=TEXTS["btn_submit"][lang]
    btn_header.description=TEXTS["btn_header"][lang]
    btn_status.description=TEXTS["btn_status_init"][lang]
    btn_doc.description=TEXTS["btn_doc"][lang]

# adding events
btn_doc.on_click(on_btn_doc_clicked)
btn_sample_review.on_click(on_btn_sample_review)
btn_submit.on_click(on_btn_submit)
btn_clear_text.on_click(on_btn_clear_text)
dpd_lang.observe(on_lang_select)

# Layout and Style 
applayout1 = AppLayout(left_sidebar=btn_doc,
                       right_sidebar=dpd_lang)
applayout2 = AppLayout(left_sidebar=btn_sample_review,
                       center=btn_submit,
                       right_sidebar=btn_clear_text)

def emptyPlot():
    output.clear_output()

init()
display(VBox([btn_header, applayout1, btn_status, applayout2, text_area, output]))
