# A Look At WOVEncoder & WOVEncodable
This notebook looks at some simple examples of the word-order variation encoder.

In [1]:
from encoder import WOVEncoder
import pandas as pd
import numpy as np
from IPython.display import display, HTML

In [2]:
def display_encoding(original_str, encodable):
    tokenised_str_input = encodable.t_inp
    tokenised_str_input = [token.strip('▁') for token in tokenised_str_input]
    
    tokenised_str_output = encodable.t_out
    encoding = encodable.get_encoding()
    
    print("--Example Sentence--")
    print(original_str)
    print("\n")

    print("--Tokenised Input--")
    print(tokenised_str_input)
    print("\n")

    print("--Tokenised Output--")
    print(tokenised_str_output)
    print("\n")

    print("--WOVEncoding--")
    df = pd.DataFrame(encoding, index=tokenised_str_output, \
                      columns=tokenised_str_input, dtype=np.int32)
    
    #Remove boring punctuation!
    punc = ['', '.']
    for p in punc:
        #Check in the input tokens (col headers)
        if p in df.columns:
            df = df.drop([p],axis=1)
        #Check in the output tokens (row headers)
        if p in df.index:
            df = df.drop([p],axis=0)
    
    display(HTML(df.to_html()))
    
def test_encodable(example, encoder):
    encodable = encoder.encode([example])
    display_encoding(example, encodable)

## Create the Encoder

In [3]:
encoder = WOVEncoder()

loaded_name model
loaded_name model
loaded_name tokenizer
loaded_name similarity_model
loaded_name similarity_tokenizer
loaded_name batch_size
loaded_name device
loaded_name masker
loaded_name masker
loaded_name tokenizer
loaded_name mask_token
loaded_name collapse_mask_token
loaded_name output_type
loaded_name model
loaded_name model
loaded_name tokenizer
loaded_name device
loaded_name link


## Example 1: This is a test.
An example with no word-order variation.

In [4]:
example = "This is a test."
test_encodable(example, encoder)

  0%|          | 0/30 [00:00<?, ?it/s]

Partition explainer: 2it [00:10, 10.42s/it]                                                                                                                                              

--Example Sentence--
This is a test.


--Tokenised Input--
['This', 'is', 'a', 'test', '.', '']


--Tokenised Output--
['Das', 'ist', 'ein', 'Test', '.']


--WOVEncoding--





Unnamed: 0,This,is,a,test
Das,1,0,0,0
ist,0,1,0,0
ein,0,0,1,0
Test,0,0,0,1


## Example 2: I hire him on Monday.
An example with separable verbs in German.<br>
In German, "hire" is "einstellen", but "ein" should move to the very end of the sentence.<br>
For instance, "I <b>hire</b> someone" = "Ich <b>stelle</b> jemand <b>ein</b>".<br>

In [5]:
example = "I hire him on Monday."
test_encodable(example, encoder)

  0%|          | 0/42 [00:00<?, ?it/s]

--Example Sentence--
I hire him on Monday.


--Tokenised Input--
['I', 'hire', 'him', 'on', 'Monday', '.', '']


--Tokenised Output--
['Ich', 'stelle', 'ihn', 'am', 'Montag', 'ein', '.']


--WOVEncoding--


Unnamed: 0,I,hire,him,on,Monday
Ich,1,0,0,0,0
stelle,0,1,0,0,0
ihn,0,0,1,0,0
am,0,0,0,1,0
Montag,0,0,0,0,1
ein,0,1,0,0,0


## Example 3: I ate it yesterday.
An example where the verb moves to the end of the sentence.<br>
There is also a tense change here from <i>simple paste</i> to <i>present perfect simple</i>, equivelent to "I have eaten it".<br>
"<b>I ate</b> it yesterday" <=> "<b>Ich habe</b> es gestern <b>gegessen</b>"

In [6]:
example = "I ate it yesterday."
test_encodable(example, encoder)

--Example Sentence--
I ate it yesterday.


--Tokenised Input--
['I', 'at', 'e', 'it', 'yesterday', '.', '']


--Tokenised Output--
['Ich', 'habe', 'es', 'gestern', 'gegessen', '.']


--WOVEncoding--


Unnamed: 0,I,at,e,it,yesterday
Ich,1,0,0,0,0
habe,1,1,1,0,0
es,0,0,0,1,0
gestern,0,0,0,0,1
gegessen,0,1,1,0,0
