In [1]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height:10000px;
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
    
}
</style>

In [7]:
# This cell needs to be executed first for the initial
# visualization to work. All important functions used here
# are explained later.

from IPython.display import HTML
import sys
from collections import defaultdict
import numpy as np
from prettytable import PrettyTable
import networkx as nx
from networkx.algorithms import bipartite
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from matplotlib.collections import LineCollection
import pandas as pd
import seaborn as sns
from copy import deepcopy, copy
%matplotlib inline

data_file = "data/dev-test-train.pl-en"
num_sent = sys.maxint
num_iter = 10

def get_bitext(file_name, num_sent):
    return [[sentence.strip().split() for sentence in pair.split(' ||| ')] for pair in open(file_name)][:num_sent]

bitext = get_bitext(data_file, num_sent)

def initialize_theta(bitext):
    f_vocab = set()
    e_vocab = set()
    for (f, e) in bitext:
        f_vocab.update(f)
        e_vocab.update(e)
    theta = {}
    default_p = 1.0/len(f_vocab)
    for e in e_vocab:
        theta[e] = defaultdict(float)
        for f in f_vocab:
            theta[e][f] = default_p
    return theta

def expectation(theta, bitext):
    e_count = defaultdict(float)
    fe_count = defaultdict(float)
    for (n, (f, e)) in enumerate(bitext):
        for f_i in f:
            z = 0
            for e_j in e:
                z += theta[e_j][f_i]
            for e_j in e:
                c = theta[e_j][f_i] / z
                fe_count[(f_i, e_j)] += c
                e_count[e_j] += c
    return e_count, fe_count

def maximization(theta, e_count, fe_count):
    new_theta = deepcopy(theta)
    for e_i in new_theta:
        for f_j in new_theta[e_i]:
            if (f_j, e_i) in fe_count:
                new_theta[e_i][f_j] = fe_count[(f_j, e_i)]/e_count[e_i]
            else:
                new_theta[e_i][f_j] = 0
    return new_theta

def iteration(theta, bitext):
    e_count, fe_count = expectation(theta, bitext)
    new_theta = maximization(theta, e_count, fe_count)
    return new_theta

def align(f_sent, e_sent, theta):
    a = []
    for i in range(0, len(f_sent)):
        best_prob = 0
        best_j = 0
        for j in range(0, len(e_sent)):
            if theta[e_sent[j]][f_sent[i]] > best_prob:
                best_prob = theta[e_sent[j]][f_sent[i]]
                best_j = j
        a.append((i, best_j))
    return a

#######################################
# STATIC ILLUSTRATIONS
#######################################
def show_alignments():
    train(get_bitext(data_file, num_sent), num_iter, draw_a=True)

def show_translation_tables():
    train(get_bitext(data_file, num_sent), num_iter, draw_p=True)
    
def show_sent_data(at_iter, sent_index=0, word_index=0):
    if sent_index > len(bitext)-1:
        sent_index = len(bitext)-1
    if word_index > len(bitext[sent_index][0])-1:
        word_index = len(bitext[sent_index][0])-1
    theta = train(bitext, at_iter)
    f, e = bitext[sent_index]
    coords = get_coordinates(bitext, one_sent=True, sent_index=sent_index, word_index=word_index)
    annot1_y = [y+0.5 for y in coords['y_e']]
    annot1_y.append(annot1_y[0])
    annot2_y = [y+1 for y in coords['y_e']]
    annot2_y.append(annot2_y[0])
    annot_x = [0]
    annot_x.extend(coords['x_e'])
    annot_color = ['#1a75ff']
    annot_color.extend(['#000000']*len(coords['x_e']))
    alignment_p = ['{:.2f}'.format(p) for p in 
                   get_alignment_probabilities(f, e, theta, one_word=True, word_index=word_index)[0]]
    annot2_words = ['alignment\nprobability']
    annot2_words.extend(alignment_p)
    translation_p = ['{:.2f}'.format(p) for p in 
                     get_translation_probabilities(f, e, theta, one_word=True, word_index=word_index)[word_index]]
    annot1_words = ['translation\nprobability']
    annot1_words.extend(translation_p)
    fig = plt.figure(figsize=(10, 8))
    ax = plt.axes()
    plt.axis('off')
    ax.scatter(coords['x_f']+coords['x_e']+annot_x*2,
               coords['y_f']+coords['y_e']+annot1_y+annot2_y,
               s=30, c='white', marker='o', lw=0, alpha=1)
    plot_words(ax, coords['x_f'], coords['y_f'], coords['w_f'], 'top')
    plot_words(ax, coords['x_e'], coords['y_e'], coords['w_e'], 'bottom')
    plot_words(ax, annot_x, annot1_y, annot1_words, 'bottom', weight='normal', color=annot_color)
    plot_words(ax, annot_x, annot2_y, annot2_words, 'bottom', weight='normal', color=annot_color)
    
    w_start = len(f)*word_index
    raw_line_weights = get_line_weights([bitext[sent_index]], at_iter, thetas=[theta])[0]
    line_weights = [w*10 for w in raw_line_weights][w_start:(w_start+len(f))]
    edge_coords = coords['edges']
    lines = [ax.plot(xy[0], xy[1],alpha=0.9,lw=w,linestyle='-',color='#1a75ff')[0] for xy,w
             in zip(coords['edges'], line_weights)]
    plt.show()

def draw_iteration(at_iter, bitext):
    theta_before = train(bitext, at_iter-1)
    theta_after = train(bitext, at_iter)
    fig = plt.figure(1, figsize=(20, 5))
    
    # first translation table
    ax1 = plt.subplot(131)
    theta2frame = pd.DataFrame.from_dict(theta_before, orient="index")
    data = theta2frame.round(2)
    sns.set(font_scale=1.2)
    sns.set_style({"savefig.dpi": 100})
    sns.heatmap(data, cmap=plt.cm.Blues, linewidths=.1, annot=True, fmt="2g")
    ax1.xaxis.tick_top()
    
    # alignment graph
    ax2 = plt.subplot(132)
    line_weights = [[w*(10.0/3) for w in sublist] for sublist in get_line_weights(bitext, at_iter-1)]
    coordinates = get_coordinates(bitext)
    ax2.axis('off')
    ax2.scatter(coordinates['x_f']+coordinates['x_e'], coordinates['y_f']+coordinates['y_e'],
           s=30, c='white', marker='o', lw=0,alpha=1)
    plot_words(ax2, coordinates['x_f'], coordinates['y_f'], coordinates['w_f'], 'top')
    plot_words(ax2, coordinates['x_e'], coordinates['y_e'], coordinates['w_e'], 'bottom')
    lines = [ax2.plot(xy[0], xy[1],alpha=0.9,linestyle='-',lw=w,color='#1a75ff')[0]
             for xy,w in zip(coordinates['edges'],line_weights[at_iter-1])]
    
    # second translation table
    ax3 = plt.subplot(133)
    theta2frame = pd.DataFrame.from_dict(theta_after, orient="index")
    data = theta2frame.round(2)
    sns.set(font_scale=1.2)
    sns.set_style({"savefig.dpi": 100})
    sns.heatmap(data, cmap=plt.cm.Blues, linewidths=.1, annot=True, fmt="2g")
    ax3.xaxis.tick_top()
    
    plt.show()
    
def train(bitext, num_iter, draw_a=False, draw_p=False):
    k = 0
    theta = initialize_theta(bitext)
    if draw_a or draw_p:
        print("\nInitialization:\n")
    draw(bitext, theta, draw_a, draw_p)
    while k < num_iter:
        k += 1
        new_theta = iteration(theta, bitext)
        theta = new_theta
        if draw_a or draw_p:
            print("\nIteration {}:\n".format(str(k)))
        draw(bitext, theta, draw_a, draw_p)
    return theta

def draw(bitext, theta, draw_a, draw_p):
    if draw_p:
        draw_translation_table(bitext, theta)
    if draw_a:
        draw_alignments(bitext, theta)
    
def draw_alignments(bitext, theta, fig=None, scale=None):
    if not fig:
        plt.figure(figsize=(10, 15))
#     font_size = 18 if not scale else 18.0*scale
    node_size = 3000 if not scale else 3000.0*scale
    for (n, (f, e)) in enumerate(bitext):
        a_graph = nx.Graph()
        a_graph.add_nodes_from(e, bipartite=0)
        a_graph.add_nodes_from(f, bipartite=1)
        a_probs = get_alignment_probabilities(f, e, theta)
        pos = {}
        for j in range(0, len(f)):
            for i in range(0, len(e)):
                a_graph.add_edge(f[j], e[i], weight=a_probs[j][i])
            pos[f[j]] = (j+1, (2*n) - 1)
        for i in range(0, len(e)):
            pos[e[i]] = (i+1, 2*n)
        nodes = nx.draw_networkx_nodes(a_graph, pos, node_size=node_size, alpha=1, node_color='white')
        nodes.set_edgecolor('white')
        nx.draw_networkx_labels(a_graph, pos, font_size=18, font_family='sans-serif',
                                font_weight='bold', font_color='black')
        edge_weights = [int(attr['weight']*100) for (_,_, attr) in a_graph.edges(data=True)]
        nx.draw_networkx_edges(a_graph, pos, width=3, alpha=0.9, edge_color=edge_weights,
                               edge_cmap=plt.cm.Blues)
    plt.axis('off')
    if not fig:
        plt.show()

def draw_translation_table(bitext, theta, fig=None):
    if not fig:
        plt.figure(figsize=(7, 5))
    theta2frame = pd.DataFrame.from_dict(theta, orient="index")
    data = theta2frame.round(2)
    sns.set(font_scale=1.2)
    sns.set_style({"savefig.dpi": 100})
    ax = sns.heatmap(data, cmap=plt.cm.Blues, linewidths=.1, annot=True, fmt="2g")
    ax.xaxis.tick_top()
    plt.yticks(rotation=0)
    if not fig:
        plt.show()
    
def get_translation_probabilities(f, e, theta, one_word=False, word_index=0):
    t_probs = []
    for j in range(0, len(f)):
        t_probs.append([])
        if (not one_word) or (one_word and word_index==j):
#             t_probs.append([])
            for i in range(0, len(e)):
                t_probs[j].append(theta[e[i]][f[j]])
    return t_probs

def get_alignment_probabilities(f, e, theta, one_word=False, word_index=0):
    t_probs = get_translation_probabilities(f, e, theta, one_word, word_index)
    A = []
    for i in range(0, len(t_probs)):
        if t_probs[i]:
            total = sum(t_probs[i])
            alignment_probs = [x/total for x in t_probs[i]]
            A.append(alignment_probs)
    return A

#######################################
# PRINTING
#######################################

def print_iteration(k, bitext, theta):
#     print("\nIteration {}:\n".format(str(k)))
    likelihood = get_data_log_likelihood(bitext, theta)
    print("Data log likelihood is {}\n".format(str(likelihood)))
    print("Translation probabilities:")
    for (f, e) in bitext:
        t_probs = get_translation_probabilities(f, e, theta)
        headings = [""]
        headings.extend(["e"+str(n)+": "+word for (n, word) in enumerate(e)])
        out_t = PrettyTable(headings, hrules=True)
        for j in range(0, len(f)):
            row = ["f"+str(j)+": "+f[j]]
            row.extend(["%.2f" %p for p in t_probs[j]])
            out_t.add_row(row)
        out_t.align[""] = "l"
        print(out_t)

def print_translation_tables(k, theta):
    e_vocab = theta.keys()
    f_vocab = set([])
    [f_vocab.update(set(d.keys())) for d in theta.values()]
    f_vocab = list(f_vocab)
    headings = [""]
    headings.extend(f_vocab)
    out_t = PrettyTable(headings, hrules=True)
    for e in e_vocab:
        row = [e]
        e_trans = theta[e]
        for f in f_vocab:
            if f in e_trans:
                row.append("%.2f" %e_trans[f])
            elif k != 0:
                row.append("0.00")
            else:
                row.append("%.2f" %(1.0/len(f_vocab)))
        out_t.add_row(row)
    out_t.align[""] = "l"
    print(out_t)

#######################################
# ANIMATIONS
#######################################

def init_alignments():
    ax.scatter(coordinates['x_f']+coordinates['x_e'], coordinates['y_f']+coordinates['y_e'],
           s=30, c='white', marker='o', lw=0,alpha=1)
    plot_words(ax, coordinates['x_f'], coordinates['y_f'], coordinates['w_f'], 'top')
    plot_words(ax, coordinates['x_e'], coordinates['y_e'], coordinates['w_e'], 'bottom')
    for (n, line) in enumerate(lines):
        line.set_linewidth(line_weights[0][n])
    return lines

def animate_alignments(i):
    for (n, line) in enumerate(lines):
        line.set_linewidth(line_weights[i][n])
    return lines

def get_thetas(bitext, num_iter):
    k = 0
    thetas=[]
    theta = initialize_theta(bitext)
    thetas.append(theta)
    while k < num_iter:
        k += 1
        new_theta = iteration(theta, bitext)
        theta = new_theta
        thetas.append(new_theta)
    return thetas

def get_line_weights(bitext, num_iter, thetas=None):
    if not thetas:
        thetas = get_thetas(bitext, num_iter)
    weights = []
    for theta in thetas:
        iteration_weights=[]
        for f, e in bitext:
            a_probs = get_alignment_probabilities(f, e, theta)
            for j in range(0, len(f)):
                for i in range(0, len(e)):
                    iteration_weights.append(a_probs[j][i])
        weights.append(iteration_weights)
    return weights

def get_coordinates(bitext, one_sent=False, sent_index=0, word_index=0):
    x_positions_f = []
    y_positions_f = []
    x_positions_e = []
    y_positions_e = []
    edge_pos = []
    words_f = []
    words_e = []
    sents = bitext if not one_sent else [bitext[sent_index]]
    for (n, (f, e)) in enumerate(sents):
        for j in range(0, len(f)):
            x_positions_f.append(j+1)
            y_positions_f.append((3*n)-2)
            words_f.append(f[j])
            if (not one_sent) or (one_sent and word_index==j):
                for i in range(0, len(e)):
                    edge_pos.append([[j+1, i+1], [(3*n)-1.9, (3*n)-1.1]])
        for i in range(0, len(e)):
            x_positions_e.append(i+1)
            y_positions_e.append((3*n)-1)
            words_e.append(e[i])
    coord_dict = {'x_f': x_positions_f, 'x_e': x_positions_e,
            'y_f': y_positions_f, 'y_e': y_positions_e,
            'edges': edge_pos, 'w_f': words_f, 'w_e': words_e}
    return coord_dict

def plot_words(axes, xs, ys, words, vertical_position, weight='bold', color='black'):
    for n in range(0, len(words)):
        word = words[n]
        x = xs[n]
        y = ys[n]
        if isinstance(color, list):
            current_color=color[n]
        else:
            current_color=color
        axes.text(x, y, word, size=15, family='sans-serif', weight=weight, color=current_color,
                  horizontalalignment='center',
                  verticalalignment=vertical_position)


# hide this cell
HTML('''<script>
code_show=true; 
function code_toggle() {
    if (code_show){
        $('div.cell.code_cell.rendered.selected div.input').hide();
    } else {
        $('div.cell.code_cell.rendered.selected div.input').show();
    }
    code_show = !code_show
} 

$( document ).ready(code_toggle);
</script>

To show/hide code in this cell, click <a href="javascript:code_toggle()">here</a>.''')


Machine Translation lab 1
======

A step-by-step explanantion of IBM Model 1

We will implement IBM Model 1 and, working on a toy dataset, step through it to get a feeling for how Expectation-Maximization works.

Our aim is to model the conditional probability of a Foreign sentence F given an English sentence E - $p\theta(F|E)$, where $\theta$ denotes model parameters. We're going to make the assumption that each word $f \in F$ is a translation of one word $e \in E$. Those links between source and target words are alignments - a latent variable of IBM Model 1. They are stipulated, not given in the data. The only data we have available are paired sentences, one being the translation of the other.

If we knew the correct alignment, $p\theta(F|E)$ would be the product over $f \in F$ of the probability of $f$ being the translation of the aligned word $e$. Translation probabilities are parameters of our model which we want to discover.

Were the alignments observed rather than hidden, parameter setting could proceed through simple Maximum Likelihood Estimation. We would collect co-occurence counts between aligned words and calculate translation probabilities. With latent alignments, however, we're going to make use of the Expectation Maximization approach. We'll make a guess about translation probabilities, and use that guess to establish expected alignments. Using those alignments we'll improve our parameters. So, in each iteration we take our current guess about translation probabilities, and update it by making use of the model's latent variable.

Let's visualize how alignment probabilities change as we train the model on a toy dataset (the heavier the line, the higher the probability):

In [5]:
%%capture
line_weights = [[w*10 for w in sublist] for sublist in get_line_weights(bitext, num_iter)]
coordinates = get_coordinates(bitext)
fig = plt.figure(figsize=(8, 12))
ax = plt.axes()
plt.axis('off')
lines = [ax.plot(xy[0], xy[1],alpha=0.9,linestyle='-',color='#1a75ff')[0] for xy in coordinates['edges']]

In [8]:
anim = FuncAnimation(fig, animate_alignments, init_func=init_alignments, frames=num_iter, interval=1000, blit=True, repeat_delay=10000)
HTML(anim.to_html5_video())

# anim.save('alignment.gif', writer='imagemagick', fps=2)

We can also observe how translation probabilities change over iterations. Each row represents translation probability distribution over vocabulary of F for one word from vocabulary of E.

TODO: same, should be an evolving picture

In [None]:
# thetas = get_thetas(bitext, num_iter)
# fig = plt.figure(figsize=(7, 5))
# sns.set(font_scale=1.2)
# sns.set_style({"savefig.dpi": 100})
# theta2frame = pd.DataFrame.from_dict(thetas[0], orient="index")
# data = theta2frame.round(2)
# # plot = sns.heatmap(data, cmap=plt.cm.Blues, linewidths=.1, annot=True, fmt="2g")
# ax = sns.heatmap(data, cmap=plt.cm.Blues, linewidths=.1)
# ax.xaxis.tick_top()
# plt.yticks(rotation=0)
    
# def init():
#     ax.data = data
#     return ax,

# def animate(i):
#     theta2frame = pd.DataFrame.from_dict(thetas[i], orient="index")
#     data = theta2frame.round(2)
#     ax.data = data
#     return ax,

# anim = FuncAnimation(fig, animate, init_func=init, frames=num_iter, interval=1000, repeat_delay=10000)
# HTML(anim.to_html5_video())
# anim.save('translation.gif', writer='imagemagick', fps=2) 

show_translation_tables()

Now, let us go back to iteration 0, when we know nothing about the actual translation probabilities and correct alignments. In absence of reasons to think otherwise, we will guess that translation probability distributions are uniform.

In [None]:
def initialize_theta(bitext):
    f_vocab = set()
    e_vocab = set()
    for (f, e) in bitext:
        f_vocab.update(f)
        e_vocab.update(e)
    theta = {}
    default_p = 1.0/len(f_vocab)
    for e in e_vocab:
        theta[e] = defaultdict(float)
        for f in f_vocab:
            theta[e][f] = default_p
    return theta

Having made an initial guess, we can now execute the Expectation step of our EM learning algorithm. Given the translation probability table we can say what is our expectation about the alignments. The intuition is that for words *f* and *e* co-occuring in a sentence pair, if word *f* is a highly probable translation of word *e*, it is likely that *f* is aligned to *e*. In fact, the probability of alignment is proportional to translation probability. You can inspect translation and alignment probabilities for any Foreign word token by changing the iteration, sentence, and word index argumens in the function call below:

In [None]:
show_sent_data(at_iter=0, sent_index=1, word_index=0)

In the Expectation step we use alignmment probabilities as weights on word co-occurences. In the example above the expected co-occurence count between *buty* and each of the four *e* words is 0.25.

We go through the whole corpus and collect all the expected co-occurence counts, as well as the occurence counts for each word in the English vocabulary.

In [None]:
def expectation(theta, bitext):
    e_count = defaultdict(float)
    fe_count = defaultdict(float)
    for (n, (f, e)) in enumerate(bitext):
        for f_i in f:
            z = 0
            for e_j in e:
                z += theta[e_j][f_i]
            for e_j in e:
                c = theta[e_j][f_i] / z
                fe_count[(f_i, e_j)] += c
                e_count[e_j] += c
    return e_count, fe_count

In the Maximization step we use the expected counts as we would use actual counts in MLE and generate a new translation probability table. In other words, we change our initial belief about the parameters based on newly acquired belief about alignments.

For instance, *shoes* occurs once in our corpus, and co-occurs 0.25 times with *buty, nie, są*, and *smaczne*. We will re-evaluate translation probability $p_{t}(f | e)$ as 0.25 for the above four *f* words and 0 for the rest of the Foreign vocabulary.

In [None]:
def maximization(theta, e_count, fe_count):
    new_theta = deepcopy(theta)
    for e_i in new_theta:
        for f_j in new_theta[e_i]:
            if (f_j, e_i) in fe_count:
                new_theta[e_i][f_j] = fe_count[(f_j, e_i)]/e_count[e_i]
            else:
                new_theta[e_i][f_j] = 0
    return new_theta

Those two steps together constitute one iteration of the training procedure.

In [None]:
def iteration(theta, bitext):
    e_count, fe_count = expectation(theta, bitext)
    new_theta = maximization(theta, e_count, fe_count)
    return new_theta

 To illustrate the parameter updating which happens during an iteration, we can look at the translation probabilities from the previous iteration, the expected alignments derived on their basis, and the translation probabilities for this iteration , derived from the expected alignments:

In [None]:
draw_iteration(5, bitext)

In [None]:
# TODO maybe:
# interactive visualisation with
#     translation table from previous iteration
#     bitext with alignment probabilities
#     table of expected co-occurence counts
#     new translation table
# can step through iterations

 We can execute a set number of iterations, or continue untill our model stops improving. A reasonable measure of improvement for our model would be data likelihood. 

In [None]:
def train1(bitext, num_iter):
    k = 0
    theta = initialize_theta(bitext)
    print("\nInitialization:\n")
    draw_translation_table(k, bitext, theta)
    while k < num_iter:
        k += 1
        theta = iteration(theta, bitext)
        print("\nIteration {}:\n".format(str(k)))
        draw_translation_table(k, bitext, theta)
    return theta
        
def train2(bitext, previous_likelihood=-sys.maxint):
    threshold = 0.01
    theta = initialize_theta(bitext)
    likelihood = get_data_log_likelihood(bitext, theta)
    while (likelihood - previous_likelihood) > threshold:
        print('next iteration\n')
        theta = iteration(theta, bitext)
        likelihood = get_data_log_likelihood(bitext, theta)
    return theta
#     if (likelihood - previous_likelihood) > threshold:
#         return train2(bitext, likelihood)
#     else:
#         return theta

In [None]:
def get_data_log_likelihood(bitext, theta):
    data_log_likelihood = 0
    for (f, e) in bitext:
        data_log_likelihood += get_pair_likelihood(e,f,theta)
        return data_log_likelihood
    
def get_pair_likelihood(e_sent,f_sent,theta):
    first_column = [np.log(theta[e_j][f_sent[0]]) for e_j in e_sent]
    current_sum = list_log_add(first_column)
    for i in range(1, len(f_sent)):
        next_column = [(np.log(theta[e_j][f_sent[i]]) + current_sum) for e_j in e_sent]
        next_sum = list_log_add(next_column)
        current_sum = next_sum
    return current_sum

def list_log_add(l):
    if len(l) == 1:
        return l[0]
    else:
        new_l = []
        first_sum = log_add(l[0], l[1])
        new_l.append(first_sum)
        new_l.extend(l[2:])
        return list_log_add(new_l)

def log_add(x,y):
    # given x=ln(x') and y=ln(y') returns ln(x'+y')
    return x + np.log(1+ np.exp(y-x))

We can inspect how does the log likelihood change over iterations.

In [None]:
def get_likelihoods(bitext, theta, previous_likelihood=-sys.maxint, l=[]):
    threshold = 0.01
    likelihood = get_data_log_likelihood(bitext, theta)
    l.append(likelihood)
    if (likelihood - previous_likelihood) > threshold:
        new_theta = iteration(theta, bitext)
        return get_likelihoods(bitext, new_theta, likelihood, l)
    else:
        return l

def plot_likelihoods():
    bitext = get_bitext(data_file, num_sent)
    y = get_likelihoods(bitext, initialize_theta(bitext))
    x = range(0, len(y))
    plt.figure(figsize=(6, 4.5))
    plt.plot(x, y, marker='o', markersize=5, color='cornflowerblue', linestyle='-', linewidth=2)
    plt.xlabel('iteration')
    plt.ylabel('log likelihood')
    plt.show()

plot_likelihoods()

*************************************************
           PLOTLY ANIMATION
*************************************************         

In [None]:
from plotly.graph_objs import *
from plotly.offline import init_notebook_mode, iplot
from IPython.display import display, HTML

init_notebook_mode(connected=True)

In [None]:
def draw_iterations(bitext, num_iter):
    k = 0
    theta = initialize_theta(bitext)
    print("\nInitialization:\n")
    draw_alignments(bitext, theta)
    while k < num_iter:
        k += 1
        new_theta = iteration(theta, bitext)
        theta = new_theta
        print("\nIteration {}:\n".format(str(k)))
        draw_alignments(bitext, theta)


def draw_alignments(bitext, theta):
    Xv=[]
    Yv=[]
    textv=[]
    positions=[]
    Xed=[]
    Yed=[]
    weights=[]
    for (n, (f, e)) in enumerate(bitext):
        a_probs = get_alignment_probabilities(f, e, theta)
        for j in range(0, len(f)):
            Xv.append(j+1)
            Yv.append((2*n)-1)
            textv.append(f[j])
            positions.append('bottom')
            for i in range(0, len(e)):
                Xed.append([j+1, i+1, None])
                Yed.append([(2*n)-1, 2*n, None])
                weights.append(a_probs[j][i])
        for i in range(0, len(e)):
            Xv.append(i+1)
            Yv.append(2*n)
            textv.append(e[i])
            positions.append('top')
    
    traces=[]
    trace1=Scatter(x=Xv,
           y=Yv,
           mode='markers+text',
           text=textv,
           textposition=positions,
           name='net',
           marker=Marker(symbol='dot',
                         size=5, 
                         color='rgb(210,210,210)',
                         line=Line(color='rgb(210,210,210)', width=0.5)
                        ),
           hoverinfo='text'
           )
    traces.append(trace1)
    for x_coords, y_coords, w in zip(Xed, Yed, weights):
        trace=Scatter(x=x_coords,
                     y=y_coords,
                     mode='lines',
                     line=Line(color='rgb(210,210,210)', width=int(w*10)),
                     hoverinfo='none'
                     )
        traces.append(trace)
        
    # hide axis line, grid, ticklabels and  title
    axis=dict(showline=False,
              zeroline=False,
              showgrid=False,
              showticklabels=False,
              title='' 
              )

    width=800
    height=800
    layout=Layout(title= "Alignment probability", 
        font= Font(size=12),
        showlegend=False,
        autosize=False,
        width=width,
        height=height,
        xaxis=XAxis(axis),
        yaxis=YAxis(axis),          
        margin=Margin(
            l=40,
            r=40,
            b=85,
            t=100,
        ),
        hovermode='closest',          
        )
    data1=Data(traces)
    fig1=Figure(data=data1, layout=layout)
    iplot(fig1, filename='Alignment probabilities')

In [None]:
bitext = get_bitext(data_file, num_sent)
draw_iterations(bitext, num_iter)

In [None]:
def animate_alignments(bitext,thetas):
    Xv=[]
    Yv=[]
    textv=[]
    positions=[]
    Xed=[]
    Yed=[]
    for (n, (f, e)) in enumerate(bitext):
        for j in range(0, len(f)):
            Xv.append(j+1)
            Yv.append((2*n)-1)
            textv.append(f[j])
            positions.append('bottom')
            for i in range(0, len(e)):
                Xed.append([j+1, i+1, None])
                Yed.append([(2*n)-1, 2*n, None])
        for i in range(0, len(e)):
            Xv.append(i+1)
            Yv.append(2*n)
            textv.append(e[i])
            positions.append('top')

    trace1=Scatter(x=Xv,
           y=Yv,
           mode='markers+text',
           text=textv,
           textposition=positions,
           name='net',
           marker=Marker(symbol='dot',
                         size=5, 
                         color='rgb(210,210,210)',
                         line=Line(color='rgb(210,210,210)', width=0.5)
                        ),
           hoverinfo='text'
           )
    
    graph_frames = []
    for theta in thetas:
        weights=[]
        for f, e in bitext:
            a_probs = get_alignment_probabilities(f, e, theta)
            for j in range(0, len(f)):
                for i in range(0, len(e)):
                    weights.append(a_probs[j][i])
        traces=[]
        traces.append(trace1)
        for x_coords, y_coords, w in zip(Xed, Yed, weights):
            trace=Scatter(x=x_coords,
                         y=y_coords,
                         mode='lines',
                         line=Line(color='rgb(210,210,210)', width=int(w*10)),
                         hoverinfo='none'
                         )
            traces.append(trace)
        frame=Data(traces)
        graph_frames.append(frame)
        

# Debug: are all the frames the same?!
#     for frame in graph_frames:
#         print"\n\nNext Frame:\n"
#         for n in range(1, len(frame)):
#             print(frame[n]['line']['width'])
    
    axis=dict(showline=False,
              zeroline=False,
              showgrid=False,
              showticklabels=False,
              title='' 
              )

    width=800
    height=800
    layout=Layout(title= "Alignment probability", 
        font= Font(size=12),
        showlegend=False,
        autosize=False,
        width=width,
        height=height,
        xaxis=XAxis(axis),
        yaxis=YAxis(axis),          
        margin=Margin(
            l=40,
            r=40,
            b=85,
            t=100,
        ),
        updatemenus= [{'type': 'buttons',
                           'buttons': [{'label': 'Play',
                                        'method': 'animate',
                                        'args': [None]}]}]
    )
    figure = Figure(data=graph_frames[0], layout=layout, frames=graph_frames[1:])
    iplot(figure, filename='Alignment probabilities')

In [None]:
def get_thetas(bitext, num_iter):
    k = 0
    thetas=[]
    theta = initialize_theta(bitext)
    thetas.append(theta)
    while k < num_iter:
        k += 1
        new_theta = iteration(theta, bitext)
        theta = new_theta
        thetas.append(new_theta)
    return thetas

In [None]:
bitext = get_bitext(data_file, num_sent)
animate_alignments(bitext, get_thetas(bitext, num_iter))

In [None]:
from plotly.offline import init_notebook_mode, iplot
from IPython.display import display, HTML
import numpy as np

init_notebook_mode(connected=True)

t=np.linspace(-1,1,100)
x=t+t**2
y=t-t**2
xm=np.min(x)-1.5
xM=np.max(x)+1.5
ym=np.min(y)-1.5
yM=np.max(y)+1.5
N=50
s=np.linspace(-1,1,N)
xx=s+s**2
yy=s-s**2


data=[dict(x=x, y=y, 
           mode='lines', 
           line=dict(width=2, color='blue')
          ),
      dict(x=x, y=y, 
           mode='lines', 
           line=dict(width=2, color='blue')
          )
    ]

layout=dict(xaxis=dict(range=[xm, xM], autorange=False, zeroline=False),
            yaxis=dict(range=[ym, yM], autorange=False, zeroline=False),
            title='Kinematic Generation of a Planar Curve', hovermode='closest',
            updatemenus= [{'type': 'buttons',
                           'buttons': [{'label': 'Play',
                                        'method': 'animate',
                                        'args': [None]}]}])

frames=[dict(data=[dict(x=[xx[k]], 
                        y=[yy[k]], 
                        mode='markers', 
                        marker=dict(color='red', size=10)
                        )
                  ]) for k in range(N)]    
          
figure1=dict(data=data, layout=layout, frames=frames)          
iplot(figure1)

In [None]:
%matplotlib inline
import sys
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

fig, ax = plt.subplots()
x = np.arange(0, 20, 0.1)
line, = ax.plot([], [], 'r-', linewidth=2)

def init():
    ax.scatter(x, x + np.random.normal(0, 3.0, len(x)))
    line.set_data(x, x-5)
    return (line,)

def update(i):
    line.set_ydata(x - 5 + i)
    return line, ax

anim = FuncAnimation(fig, update, init_func=init, frames=np.arange(0, 10), interval=200)
HTML(anim.to_html5_video())