In [105]:
import numpy as np
from scipy.spatial.distance import cosine

In [106]:
import re
import os
import tempfile
import sys
import time
import argparse
import numpy as np
from collections import namedtuple

import torch
import torch.nn as nn

In [107]:
#!/usr/bin/python
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
# LASER  Language-Agnostic SEntence Representations
# is a toolkit to calculate multilingual sentence embeddings
# and to use them for document classification, bitext filtering
# and mining
#
# --------------------------------------------------------
#
# Helper functions for tokenization and BPE

import os
import sys
import tempfile
import numpy as np
from subprocess import run, check_output, DEVNULL

# get environment
LASER = "~/Desktop/multi-embedding/LASER-master"


FASTBPE = LASER + '/tools-external/fastBPE/fast'
MOSES_BDIR = LASER + '/tools-external/moses-tokenizer/tokenizer/'
MOSES_TOKENIZER = MOSES_BDIR + 'tokenizer.perl -q -no-escape -threads 20 -l '
MOSES_LC = MOSES_BDIR + 'lowercase.perl'
NORM_PUNC = MOSES_BDIR + 'normalize-punctuation.perl -l '
DESCAPE = MOSES_BDIR + 'deescape-special-chars.perl'
REM_NON_PRINT_CHAR = MOSES_BDIR + 'remove-non-printing-char.perl'

# Romanization (Greek only)
ROMAN_LC = 'python3 ' + LASER + '/source/lib/romanize_lc.py -l '

# Mecab tokenizer for Japanese
MECAB = LASER + '/tools-external/mecab'


###############################################################################
#
# Tokenize a line
#
###############################################################################

def TokenLine(line, lang='en', lower_case=True, romanize=False):
    assert lower_case, 'lower case is needed by all the models'
    roman = lang if romanize else 'none'
    tok = check_output(
            REM_NON_PRINT_CHAR
            + '|' + NORM_PUNC + lang
            + '|' + DESCAPE
            + '|' + MOSES_TOKENIZER + lang
            + ('| python3 -m jieba -d ' if lang == 'zh' else '')
            + ('|' + MECAB + '/bin/mecab -O wakati -b 50000 ' if lang == 'ja' else '')
            + '|' + ROMAN_LC + roman,
            input=line,
            encoding='UTF-8',
            shell=True)
    return tok.strip()


###############################################################################
#
# Tokenize a file
#
###############################################################################

def Token(inp_fname, out_fname, lang='en',
          lower_case=True, romanize=False, descape=False,
          verbose=False, over_write=False, gzip=False):
    assert lower_case, 'lower case is needed by all the models'
    assert not over_write, 'over-write is not yet implemented'
    if not os.path.isfile(out_fname):
        cat = 'zcat ' if gzip else 'cat '
        roman = lang if romanize else 'none'
        # handle some iso3 langauge codes
        if lang in ('cmn', 'wuu', 'yue'):
            lang = 'zh'
        if lang in ('jpn'):
            lang = 'ja'
        if verbose:
            print(' - Tokenizer: {} in language {} {} {}'
                  .format(os.path.basename(inp_fname), lang,
                          '(gzip)' if gzip else '',
                          '(de-escaped)' if descape else '',
                          '(romanized)' if romanize else ''))
        run(cat + inp_fname
            + '|' + REM_NON_PRINT_CHAR
            + '|' + NORM_PUNC + lang
            + ('|' + DESCAPE if descape else '')
            + '|' + MOSES_TOKENIZER + lang
            + ('| python3 -m jieba -d ' if lang == 'zh' else '')
            + ('|' + MECAB + '/bin/mecab -O wakati -b 50000 ' if lang == 'ja' else '')
            + '|' + ROMAN_LC + roman
            + '>' + out_fname,
            env=dict(os.environ, LD_LIBRARY_PATH=MECAB + '/lib'),
            shell=True)
    elif not over_write and verbose:
        print(' - Tokenizer: {} exists already'
              .format(os.path.basename(out_fname), lang))


###############################################################################
#
# Apply FastBPE for one line
# This implementation is highly suboptimal since we have to spawn a new
# process and load the BPE codes for each line !!
#
###############################################################################

def BPEfastApplyLine(line, bpe_codes):
    bpe_vocab = bpe_codes.replace('fcodes', 'fvocab')
    if not os.path.isfile(bpe_vocab):
        print(' - fast BPE: focab file not found {}'.format(bpe_vocab))
        bpe_vocab = ''
    with tempfile.TemporaryDirectory() as tmpdir:
        ifn = os.path.join(tmpdir, 'tok')
        ofn = os.path.join(tmpdir, 'bpe')
        with open(ifn, 'w') as f:
            f.write('{}\n'.format(line))
        run(FASTBPE + ' applybpe ' + ofn + ' ' + ifn
            + ' ' + bpe_codes + ' ' + bpe_vocab,
            shell=True, stderr=DEVNULL)
        with open(ofn, 'r') as f:
            bpe = f.readlines()
        assert len(bpe) == 1, 'ERROR: unexpected BPE output'
        return bpe[0]


def BPEfastApply(inp_fname, out_fname, bpe_codes,
                 verbose=False, over_write=False):
    if not os.path.isfile(out_fname):
        if verbose:
            print(' - fast BPE: processing {}'
                  .format(os.path.basename(inp_fname)))
        bpe_vocab = bpe_codes.replace('fcodes', 'fvocab')
        if not os.path.isfile(bpe_vocab):
            print(' - fast BPE: focab file not found {}'.format(bpe_vocab))
            bpe_vocab = ''
        run(FASTBPE + ' applybpe '
            + out_fname + ' ' + inp_fname
            + ' ' + bpe_codes
            + ' ' + bpe_vocab, shell=True, stderr=DEVNULL)
    elif not over_write and verbose:
        print(' - fast BPE: {} exists already'
              .format(os.path.basename(out_fname)))


###############################################################################
#
# Split long lines into multiple sentences at "."
#
###############################################################################

def SplitLines(ifname, of_txt, of_sid):
    if os.path.isfile(of_txt):
        print(' - SplitLines: {} already exists'.format(of_txt))
        return
    nl = 0
    nl_sp = 0
    maxw = 0
    maxw_sp = 0
    fp_sid = open(of_sid, 'w')
    fp_txt = open(of_txt, 'w')
    with open(ifname, 'r') as ifp:
        for line in ifp:
            print('{:d}'.format(nl), file=fp_sid)  # store current sentence ID
            nw = 0
            words = line.strip().split()
            maxw = max(maxw, len(words))
            for i, word in enumerate(words):
                if word == '.' and i != len(words)-1:
                    if nw > 0:
                        print(' {}'.format(word), file=fp_txt)
                    else:
                        print('{}'.format(word), file=fp_txt)
                    # store current sentence ID
                    print('{:d}'.format(nl), file=fp_sid)
                    nl_sp += 1
                    maxw_sp = max(maxw_sp, nw+1)
                    nw = 0
                else:
                    if nw > 0:
                        print(' {}'.format(word), end='', file=fp_txt)
                    else:
                        print('{}'.format(word), end='', file=fp_txt)
                    nw += 1
            if nw > 0:
                # handle remainder of sentence
                print('', file=fp_txt)
                nl_sp += 1
                maxw_sp = max(maxw_sp, nw+1)
            nl += 1
    print(' - Split sentences: {}'.format(ifname))
    print(' -                  lines/max words: {:d}/{:d} -> {:d}/{:d}'
          .format(nl, maxw, nl_sp, maxw_sp))
    fp_sid.close()
    fp_txt.close()


###############################################################################
#
# Join embeddings of previously split lines (average)
#
###############################################################################

def JoinEmbed(if_embed, sid_fname, of_embed, dim=1024):
    if os.path.isfile(of_embed):
        print(' - JoinEmbed: {} already exists'.format(of_embed))
        return
    # read the input embeddings
    em_in = np.fromfile(if_embed, dtype=np.float32, count=-1).reshape(-1, dim)
    ninp = em_in.shape[0]
    print(' - Combine embeddings:')
    print('                input: {:s} {:d} sentences'.format(if_embed, ninp))

    # get all sentence IDs
    sid = np.empty(ninp, dtype=np.int32)
    i = 0
    with open(sid_fname, 'r') as fp_sid:
        for line in fp_sid:
            sid[i] = int(line)
            i += 1
    nout = sid.max() + 1
    print('                IDs: {:s}, {:d} sentences'.format(sid_fname, nout))

    # combining
    em_out = np.zeros((nout, dim), dtype=np.float32)
    cnt = np.zeros(nout, dtype=np.int32)
    for i in range(ninp):
        idx = sid[i]
        em_out[idx] += em_in[i]  # cumulate sentence vectors
        cnt[idx] += 1

    if (cnt == 0).astype(int).sum() > 0:
        print('ERROR: missing lines')
        sys.exit(1)

    # normalize
    for i in range(nout):
        em_out[i] /= cnt[i]

    print('                output: {:s}'.format(of_embed))
    em_out.tofile(of_embed)

In [108]:
SPACE_NORMALIZER = re.compile("\s+")
Batch = namedtuple('Batch', 'srcs tokens lengths')

In [109]:
def buffered_read(fp, buffer_size):
    buffer = []
    for src_str in fp:
        buffer.append(src_str.strip())
        if len(buffer) >= buffer_size:
            yield buffer
            buffer = []

    if len(buffer) > 0:
        yield buffer


def buffered_arange(max):
    if not hasattr(buffered_arange, 'buf'):
        buffered_arange.buf = torch.LongTensor()
    if max > buffered_arange.buf.numel():
        torch.arange(max, out=buffered_arange.buf)
    return buffered_arange.buf[:max]


# TODO Do proper padding from the beginning
def convert_padding_direction(src_tokens, padding_idx, right_to_left=False, left_to_right=False):
    assert right_to_left ^ left_to_right
    pad_mask = src_tokens.eq(padding_idx)
    if not pad_mask.any():
        # no padding, return early
        return src_tokens
    if left_to_right and not pad_mask[:, 0].any():
        # already right padded
        return src_tokens
    if right_to_left and not pad_mask[:, -1].any():
        # already left padded
        return src_tokens
    max_len = src_tokens.size(1)
    range = buffered_arange(max_len).type_as(src_tokens).expand_as(src_tokens)
    num_pads = pad_mask.long().sum(dim=1, keepdim=True)
    if right_to_left:
        index = torch.remainder(range - num_pads, max_len)
    else:
        index = torch.remainder(range + num_pads, max_len)
    return src_tokens.gather(1, index)

In [110]:
class SentenceEncoder:

    def __init__(self, model_path, max_sentences=None, max_tokens=None, cpu=False, fp16=False, sort_kind='quicksort'):
        self.use_cuda = torch.cuda.is_available() and not cpu
        self.max_sentences = max_sentences
        self.max_tokens = max_tokens
        if self.max_tokens is None and self.max_sentences is None:
            self.max_sentences = 1

        state_dict = torch.load(model_path)
        self.encoder = Encoder(**state_dict['params'])
        self.encoder.load_state_dict(state_dict['model'])
        self.dictionary = state_dict['dictionary']
        self.pad_index = self.dictionary['<pad>']
        self.eos_index = self.dictionary['</s>']
        self.unk_index = self.dictionary['<unk>']
        if fp16:
            self.encoder.half()
        if self.use_cuda:
            print(' - transfer encoder to GPU')
            self.encoder.cuda()
        self.sort_kind = sort_kind

    def _process_batch(self, batch):
        tokens = batch.tokens
        lengths = batch.lengths
        if self.use_cuda:
            tokens = tokens.cuda()
            lengths = lengths.cuda()
        self.encoder.eval()
        embeddings = self.encoder(tokens, lengths)['sentemb']
        return embeddings.detach().cpu().numpy()

    def _tokenize(self, line):
        tokens = SPACE_NORMALIZER.sub(" ", line).strip().split()
        ntokens = len(tokens)
        ids = torch.LongTensor(ntokens + 1)
        for i, token in enumerate(tokens):
            ids[i] = self.dictionary.get(token, self.unk_index)
        ids[ntokens] = self.eos_index
        return ids

    def _make_batches(self, lines):
        tokens = [self._tokenize(line) for line in lines]
        lengths = np.array([t.numel() for t in tokens])
        indices = np.argsort(-lengths, kind=self.sort_kind)

        def batch(tokens, lengths, indices):
            toks = tokens[0].new_full((len(tokens), tokens[0].shape[0]), self.pad_index)
            for i in range(len(tokens)):
                toks[i, -tokens[i].shape[0]:] = tokens[i]
            return Batch(
                srcs=None,
                tokens=toks,
                lengths=torch.LongTensor(lengths)
            ), indices

        batch_tokens, batch_lengths, batch_indices = [], [], []
        ntokens = nsentences = 0
        for i in indices:
            if nsentences > 0 and ((self.max_tokens is not None and ntokens + lengths[i] > self.max_tokens) or 
                                   (self.max_sentences is not None and nsentences == self.max_sentences)):
                yield batch(batch_tokens, batch_lengths, batch_indices)
                ntokens = nsentences = 0
                batch_tokens, batch_lengths, batch_indices = [], [], []
            batch_tokens.append(tokens[i])
            batch_lengths.append(lengths[i])
            batch_indices.append(i)
            ntokens += tokens[i].shape[0]
            nsentences += 1
        if nsentences > 0:
            yield batch(batch_tokens, batch_lengths, batch_indices)

    def encode_sentences(self, sentences):
        indices = []
        results = []
        for batch, batch_indices in self._make_batches(sentences):
            indices.extend(batch_indices)
            results.append(self._process_batch(batch))
        return np.vstack(results)[np.argsort(indices, kind=self.sort_kind)]

In [111]:
class Encoder(nn.Module):
    def __init__(
        self, num_embeddings, padding_idx, embed_dim=320, hidden_size=512, num_layers=1, bidirectional=False,
        left_pad=True, padding_value=0.
    ):
        super().__init__()

        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size

        self.padding_idx = padding_idx
        self.embed_tokens = nn.Embedding(num_embeddings, embed_dim, padding_idx=self.padding_idx)

        self.lstm = nn.LSTM(
            input_size=embed_dim,
            hidden_size=hidden_size,
            num_layers=num_layers,
            bidirectional=bidirectional,
        )
        self.left_pad = left_pad
        self.padding_value = padding_value

        self.output_units = hidden_size
        if bidirectional:
            self.output_units *= 2

    def forward(self, src_tokens, src_lengths):
        if self.left_pad:
            # convert left-padding to right-padding
            src_tokens = convert_padding_direction(
                src_tokens,
                self.padding_idx,
                left_to_right=True,
            )

        bsz, seqlen = src_tokens.size()

        # embed tokens
        x = self.embed_tokens(src_tokens)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # pack embedded source tokens into a PackedSequence
        packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist())

        # apply LSTM
        if self.bidirectional:
            state_size = 2 * self.num_layers, bsz, self.hidden_size
        else:
            state_size = self.num_layers, bsz, self.hidden_size
        h0 = x.data.new(*state_size).zero_()
        c0 = x.data.new(*state_size).zero_()
        packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0))

        # unpack outputs and apply dropout
        x, _ = nn.utils.rnn.pad_packed_sequence(packed_outs, padding_value=self.padding_value)
        assert list(x.size()) == [seqlen, bsz, self.output_units]

        if self.bidirectional:

            def combine_bidir(outs):
                return torch.cat([
                    torch.cat([outs[2 * i], outs[2 * i + 1]], dim=0).view(1, bsz, self.output_units)
                    for i in range(self.num_layers)
                ], dim=0)

            final_hiddens = combine_bidir(final_hiddens)
            final_cells = combine_bidir(final_cells)

        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()

        # Set padded outputs to -inf so they are not selected by max-pooling
        padding_mask = src_tokens.eq(self.padding_idx).t().unsqueeze(-1)
        if padding_mask.any():
            x = x.float().masked_fill_(padding_mask, float('-inf')).type_as(x)

        # Build the sentence embedding by max-pooling over the encoder outputs
        sentemb = x.max(dim=0)[0]

        return {
            'sentemb': sentemb,
            'encoder_out': (x, final_hiddens, final_cells),
            'encoder_padding_mask': encoder_padding_mask if encoder_padding_mask.any() else None
        }

In [112]:
encoder = SentenceEncoder("LASER-master/models/bilstm.eparl21.2018-11-19.pt",
                          max_sentences=None,
                          max_tokens=12000,
                          sort_kind='mergesort',
                          cpu=False)

 - transfer encoder to GPU


In [113]:
def EncodeFilep(encoder, inp_file, buffer_size=10000, verbose=True):
    embeddings = []
    n = 0
    t = time.time()
    for sentences in tqdm(buffered_read(inp_file, buffer_size)):
        embeddings.extend(encoder.encode_sentences(sentences))
        n += len(sentences)
        if verbose and n % 10000 == 0:
            print('\r - Encoder: {:d} sentences'.format(n), end='')
    if verbose:
        print('\r - Encoder: {:d} sentences'.format(n), end='')
    return np.array(embeddings)

In [114]:
def EncodeFile(encoder, inp_fname,
              buffer_size=10000, verbose=False, over_write=False,
              inp_encoding='utf-8'):
    # TODO :handle over write
    fin = open(inp_fname, 'r', encoding=inp_encoding, errors='surrogateescape') if len(inp_fname) > 0 else sys.stdin
    embeddings = EncodeFilep(encoder, fin, buffer_size=buffer_size, verbose=verbose)
    fin.close()
    return embeddings

In [128]:
def get_embeddings(token_lang, file_name, text_list):
    token_lang = token_lang
    bpe_codes = 'LASER-master/models/eparl21.fcodes'

    with open(file_name, 'w') as f:
        for item in text_list:
            f.write("%s\n" % item)

    with tempfile.TemporaryDirectory() as tmpdir:
        ifname =  file_name# stdin will be used
        if token_lang != '--':
            tok_fname = os.path.join('', 'tok')
            Token(ifname,
                  tok_fname,
                  lang=token_lang,
                  romanize=True if token_lang == 'el' else False,
                  lower_case=True, gzip=False,
                  verbose=True, over_write=False)
            ifname = tok_fname

        if bpe_codes:
            bpe_fname = os.path.join('', 'bpe')
            BPEfastApply(ifname,
                         bpe_fname,
                         bpe_codes,
                         verbose=True, over_write=False)
            ifname = bpe_fname    

        embedings = EncodeFile(encoder, ifname)
        os.remove("tok")
        os.remove("bpe")
        return embedings

In [126]:
embedings_eco = get_embeddings('en', 'raw_file_eco.txt', sentences)

1it [00:00, 59.10it/s]

 - Tokenizer: tok exists already
 - fast BPE: bpe exists already





In [129]:
embedings_eco.shape

(1, 1024)

In [130]:
len(sentences)

661511

In [67]:
import nmslib

def create_nmslib_search_index(numpy_vectors):
    """Create search index using nmslib.

    Parameters
    ==========
    numpy_vectors : numpy.array
        The matrix of vectors

    Returns
    =======
    nmslib object that has index of numpy_vectors
    """

    search_index = nmslib.init(method='hnsw', space='cosinesimil')
    search_index.addDataPointBatch(numpy_vectors)
    search_index.createIndex({'post': 2}, print_progress=True)
    return search_index
search_index = create_nmslib_search_index(embedings_eco)
search_index.saveIndex('multilingual_index')

In [131]:
embed_demo = get_embeddings('en', 'raw_file_en.txt', ['how does inflation effect interest rate?'])

ids, distances = search_index.knnQuery(embed_demo[0], k=10)

for i, pos in enumerate(ids):
    print(sentences[pos]+'  :::   '+str(distances[i])+'\n')

 - Tokenizer: raw_file_en.txt in language en  
 - fast BPE: processing tok


1it [00:00, 82.26it/s]

2. how will monetary policy affect interest rates?  :::   0.16479647

what is the effect of prices?  :::   0.18911254

4. what is the impact of interest rate differentials on the exchange rate?  :::   0.1924169

what exactly are floating exchange rates?  :::   0.21271539

what is the impact of international currency exchange rate fluctuation?  :::   0.21645671

why a low inflation rate target?  :::   0.2171886

how would rising interest rates affect the banking sector?  :::   0.21922374

what has been the experience with inflation targeting?  :::   0.21947384

what implications did these policies have for inflation?  :::   0.22273636

however what about the risk of inflation?  :::   0.22466302






In [132]:
embed_demo = get_embeddings('nl', 'raw_file_en.txt', ['hoe werkt inflatie-effect rente?'])

ids, distances = search_index.knnQuery(embed_demo[0], k=10)

for i, pos in enumerate(ids):
    print(sentences[pos]+'  :::   '+str(distances[i])+'\n')

 - Tokenizer: raw_file_en.txt in language nl  
 - fast BPE: processing tok


1it [00:00, 131.13it/s]

2. how will monetary policy affect interest rates?  :::   0.19387078

what is the effect of prices?  :::   0.19455987

what has been the experience with inflation targeting?  :::   0.21006119

what implications did these policies have for inflation?  :::   0.21268898

what is the impact of international currency exchange rate fluctuation?  :::   0.22213888

however what about the risk of inflation?  :::   0.22290683

what are the implications for monetary policy?  :::   0.22872382

what exactly are floating exchange rates?  :::   0.23408651

but consider the effect of inflation.  :::   0.23626012

4. what is the impact of interest rate differentials on the exchange rate?  :::   0.24321729






In [133]:
embed_demo = get_embeddings('gem', 'raw_file_en.txt', ['Wie wirkt sich die Inflation auf den Zinssatz aus?'])

ids, distances = search_index.knnQuery(embed_demo[0], k=10)

for i, pos in enumerate(ids):
    print(sentences[pos]+'  :::   '+str(distances[i])+'\n')

 - Tokenizer: raw_file_en.txt in language gem  
 - fast BPE: processing tok


1it [00:00, 98.24it/s]

2. how will monetary policy affect interest rates?  :::   0.16120678

what is the effect of prices?  :::   0.19260627

what implications did these policies have for inflation?  :::   0.20439976

4. what is the impact of interest rate differentials on the exchange rate?  :::   0.2088108

what would be the impact if any on the exchange rate?  :::   0.21565342

what has been the experience with inflation targeting?  :::   0.21710718

how would rising interest rates affect the banking sector?  :::   0.21808034

however what about the risk of inflation?  :::   0.22759044

what exactly are floating exchange rates?  :::   0.22852325

what is the impact of international currency exchange rate fluctuation?  :::   0.23033577






In [134]:
embed_demo = get_embeddings('it', 'raw_file_en.txt', ['in che modo l\'inflazione influenza il tasso di interesse?'])

ids, distances = search_index.knnQuery(embed_demo[0], k=10)

for i, pos in enumerate(ids):
    print(sentences[pos]+'  :::   '+str(distances[i])+'\n')

 - Tokenizer: raw_file_en.txt in language it  
 - fast BPE: processing tok


1it [00:00, 119.86it/s]

2. how will monetary policy affect interest rates?  :::   0.14336467

how would rising interest rates affect the banking sector?  :::   0.19409412

what is the effect of prices?  :::   0.20226151

4. what is the impact of interest rate differentials on the exchange rate?  :::   0.20520967

1. how does infl ation affect nominal interest rates?  :::   0.20935893

what is the impact of international currency exchange rate fluctuation?  :::   0.21694893

what implications did these policies have for inflation?  :::   0.22093427

what exactly are floating exchange rates?  :::   0.22280097

what has been the experience with inflation targeting?  :::   0.22607595

what would be the impact if any on the exchange rate?  :::   0.2266792






In [135]:
embed_demo = get_embeddings('es', 'raw_file_en.txt', ['¿Cómo afecta la inflación a la tasa de interés?'])

ids, distances = search_index.knnQuery(embed_demo[0], k=10)

for i, pos in enumerate(ids):
    print(sentences[pos]+'  :::   '+str(distances[i])+'\n')

 - Tokenizer: raw_file_en.txt in language es  
 - fast BPE: processing tok


1it [00:00, 114.15it/s]

2. how will monetary policy affect interest rates?  :::   0.16222537

what implications did these policies have for inflation?  :::   0.21176767

1. how does infl ation affect nominal interest rates?  :::   0.2151556

how would rising interest rates affect the banking sector?  :::   0.21975899

what has been the experience with inflation targeting?  :::   0.23229277

4. what is the impact of interest rate differentials on the exchange rate?  :::   0.23261261

already a function of the present price and the rate of interest?  :::   0.23437083

what is the effect of prices?  :::   0.23609203

however what about the risk of inflation?  :::   0.240129

what exactly are floating exchange rates?  :::   0.24489737






In [136]:
embed_demo = get_embeddings('hn', 'raw_file_en.txt', ['मुद्रास्फीति पर ब्याज दर कैसे प्रभाव डालती है?'])

ids, distances = search_index.knnQuery(embed_demo[0], k=10)

for i, pos in enumerate(ids):
    print(sentences[pos]+'  :::   '+str(distances[i])+'\n')

 - Tokenizer: raw_file_en.txt in language hn  
 - fast BPE: processing tok


1it [00:00, 59.50it/s]

a straggly man in a dark hat and glasses stands in a corner brooding.  :::   0.28005415

1 simply 5 中华人民共和国中国人民银行法 chineseenglish in cch asia pacific eds.  :::   0.28070813

the slope and rsquared in the scatter plots are bigger in fig.  :::   0.28273642

i use the edition shinko kenkyu yakuchu hen 眞誥研究 譯注篇 tadao yoshikawa 吉川忠夫 and kunio mugitani 麥谷 邦夫 eds.  :::   0.2835586

model ln δrpit  ln δrft  αi  βi1ln δrmit  ln δrft  ɛit.  :::   0.28709984

in selgins view the credit 56 moneysoundandunsoundsalerno inconclusionmodernaustrianmonetarythoughtwithits rootsintheturgottraditionandemphasisonthemacroeconomic phenomenaofentrepreneurialcalculationandpricecoodination standsinradicaloppositiontothemodernmacroeconomicschools ofthoughtwhosemonetarydoctrineshavebeenmoldedwithinthe lawtradition.  :::   0.287973

model ln δrpit  ln δrft  αi  βi1ln δrmit  ln δrft  βi2smbt  βi3hmlt  βi4momt  ɛit.  :::   0.29057956

httpwww.consumerfinance.govnewsroomcfpbtakesactionagainstacecashexpressforpushingpa


