In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle

from IPython.display import HTML
from keras.models import Model, load_model
from matplotlib.colors import rgb2hex
import matplotlib.pyplot as plt
import numpy as np

from masterthesis.features.build_features import file_to_sequence
from masterthesis.utils import DATA_DIR, ATTENTION_LAYER, MODEL_DIR, RESULTS_DIR, load_split

Using TensorFlow backend.


In [3]:
def weight2color(cmap, w):
    return rgb2hex(cmap(w))

In [4]:
def show_weighted(tokens, weights):
    cmap = plt.cm.coolwarm
    ents = []
    for token, weight in zip(tokens, weights):
        col = weight2color(cmap, weight)
        ent = '<span style="color: %s">%s</span>' % (col, token)
        ents.append(ent)
    display(HTML(' '.join(ents)))

In [5]:
def tokens_in_file(filepath):
    with filepath.open() as f:
        for line in f:
            for token in line.strip().split():
                yield token

In [6]:
def file_with_attention(filepath, attention_model, w2i, maxlen=None):
    x = file_to_sequence(700, filepath, w2i)[np.newaxis, :]
    weights = attention_model.predict(x)[0]
    weights -= weights.min()
    weights /= weights.max()
    if maxlen is not None:
        weights = weights[:maxlen]
    unk_idx = w2i['__UNK__']
    tokens = (token if idx != unk_idx else '❓' for token, idx in zip(tokens_in_file(filepath), x[0]))
    show_weighted(tokens, weights)

In [7]:
model_name = 'rnn_nli-25717032'

In [8]:
w2i = pickle.load((MODEL_DIR / (model_name + '_model_w2i.pkl')).open('rb'))
model = load_model(str(MODEL_DIR / (model_name + '_model.h5')))

attention_model = Model(inputs=model.input, outputs=model.get_layer(ATTENTION_LAYER).output)

In [9]:
dev = load_split('dev')

results = pickle.load((RESULTS_DIR / (model_name + '.pkl')).open('rb'))
samples = dev.iloc[np.where(results.true == results.predictions)].sample(3)
display(samples)
filenames = [DATA_DIR / 'txt' / (fn + '.txt') for fn in samples.filename]

Unnamed: 0,age,cefr,filename,gender,lang,num_tokens,split,testlevel,title,topic
878,36,B1,s0043,kvinne,spansk,230,dev,Språkprøven,Fremmedspråkslæring,utdannelse språk
667,27,B1/B2,h0672,kvinne,spansk,408,dev,Høyere nivå,Diktet 'Ingen ting' av Paulus Utsi om språk,litteratur dikt språk
130,28,B1/B2,h0131,kvinne,polsk,753,dev,Høyere nivå,Borgerlønn,økonomi


In [10]:
file_with_attention(filenames[0], attention_model, w2i, maxlen=300)

In [11]:
file_with_attention(filenames[1], attention_model, w2i, maxlen=300)

In [12]:
file_with_attention(filenames[2], attention_model, w2i, maxlen=300)