In [53]:
ROOT = '/home/yuli_zeira/Cognatehood/'

ENG = 'eng'
SPA = 'spa'

BI_INSERTION_0 = ROOT + 'char_weights/alphabet/new/insertion_0'
BI_DELETION_0 = ROOT + 'char_weights/alphabet/new/deletion_0'
BI_SUBSTITUTION_0 = ROOT + 'char_weights/alphabet/new/substitution_0'

BI_INSERTION_1 = ROOT + 'char_weights/alphabet/new/insertion_1'
BI_DELETION_1 = ROOT + 'char_weights/alphabet/new/deletion_1'
BI_SUBSTITUTION_1 = ROOT + 'char_weights/alphabet/new/substitution_1'

TRI_INSERTION = ROOT + 'char_weights/alphabet/new/insertion_tri'
TRI_DELETION = ROOT + 'char_weights/alphabet/new/deletion_tri'
TRI_SUBSTITUTION = ROOT + 'char_weights/alphabet/new/substitution_tri'

IPA_BI_INSERTION_0 = ROOT + 'char_weights/ipa/new/ipa_insertion_0'
IPA_BI_DELETION_0 = ROOT + 'char_weights/ipa/new/ipa_deletion_0'
IPA_BI_SUBSTITUTION_0 = ROOT + 'char_weights/ipa/new/ipa_substitution_0'

IPA_BI_INSERTION_1 = ROOT + 'char_weights/ipa/new/ipa_insertion_1'
IPA_BI_DELETION_1 = ROOT + 'char_weights/ipa/new/ipa_deletion_1'
IPA_BI_SUBSTITUTION_1 = ROOT + 'char_weights/ipa/new/ipa_substitution_1'

IPA_TRI_INSERTION = ROOT + 'char_weights/ipa/new/ipa_insertion_tri'
IPA_TRI_DELETION = ROOT + 'char_weights/ipa/new/ipa_deletion_tri'
IPA_TRI_SUBSTITUTION = ROOT + 'char_weights/ipa/new/ipa_substitution_tri'

In [2]:
import stanza
import epitran

In [101]:
ABC = 'abcdefghijklmnopqrstuvwxyz'
SPANISH_ABC = 'abcdefghijklmnopqrstuvwxyzñúóéáíü'
EN_BIGRAMS = [(i, j) for i in ABC for j in ABC] + [(i, '') for i in ABC] + [('', i) for i in ABC]
SP_BIGRAMS = [(i, j) for i in SPANISH_ABC for j in SPANISH_ABC] + [(i, '') for i in SPANISH_ABC] + [('', i) for i in SPANISH_ABC]

# Dicts of weights for first char edit distance:
INSERTION_0 = dict([(i, 1.0) for i in SP_BIGRAMS])
DELETION_0 = dict([(i, 1.0) for i in EN_BIGRAMS])
SUBSTITUTION_0 = dict([((i, j), 1.0) for i in EN_BIGRAMS for j in SP_BIGRAMS])

# Dicts of weights for second char edit distance:
INSERTION_1 = dict([(i, 1.0) for i in SP_BIGRAMS])
DELETION_1 = dict([(i, 1.0) for i in EN_BIGRAMS])
SUBSTITUTION_1 = dict([((i, j), 1.0) for i in EN_BIGRAMS for j in SP_BIGRAMS])

COGNATES_LIST = ROOT + '/char_weights/cognate_list.txt'

In [105]:
IPA_CHARS = ['p', 'e', 'ɾ', 'o', 'i', 's', 'x', 'a', 'l', 'k', 'd', 'n', 'we',
             'b', 't', 'm', 'ɡ', 'f', 'u', 'jo', 't͡ʃ', 'ja', 'ʝ', 'je', '',
             'si', 'se', 'xe', 'wa', 'w', 'ɡi', 'r', 'ks', 'ɡw', 'wi', 'ɡe',
             'xi', 'ju', 'wo', 'ʃ', 'ɲ', 't͡ɬ', 't͡s', 'æ', 'ɹ̩', 'ð', 'ɪ', 'j',
             'd͡ʒ', 'ʌ', 'ɑ', 'ə', 'h', 'ɹ', 'ŋ', 'ɔ', 'ʊ', 'z', 'v', 'ɛ', 'θ',
             'ʒ']

IPA_BIGRAMS = [(i, j) for i in IPA_CHARS for j in IPA_CHARS]

# Dicts of weights for first char edit distance:
INSERTION_0 = dict([(i, 1.0) for i in IPA_BIGRAMS])
DELETION_0 = dict([(i, 1.0) for i in IPA_BIGRAMS])
SUBSTITUTION_0 = dict([((i, j), 1.0) for i in IPA_BIGRAMS for j in IPA_BIGRAMS])

# Dicts of weights for second char edit distance:
INSERTION_1 = dict([(i, 1.0) for i in IPA_BIGRAMS])
DELETION_1 = dict([(i, 1.0) for i in IPA_BIGRAMS])
SUBSTITUTION_1 = dict([((i, j), 1.0) for i in IPA_BIGRAMS for j in IPA_BIGRAMS])

COGNATES_LIST = ROOT + '/char_weights/pronunciation_cognates'

In [44]:
from collections import Counter
import json
import pickle
from numpy import argmin


# Class of cognates with weights saved in dictionary:

class Cognates:

    def __init__(self, cognates_file: str, insertion, deletion, substitution,
                 ind: int, ipa=False):
        self._is_ipa = ipa
        self._ind = ind
        self._cognates_file = cognates_file
        self._cognates = self.read_cognates()
        self._insertion = insertion
        self._deletion = deletion
        self._substitution = substitution
        self._operations = list()
        self.make_n_gram = [self.make_n_gram_first, self.make_n_gram_second][ind]

    def read_cognates(self) -> list[tuple[str, str]]:
        if self._is_ipa:
            with open(self._cognates_file, 'rb') as ff:
                cognates_list = pickle.load(ff)
        else:
            cognates_list = list()
            with open(self._cognates_file, 'r', encoding='utf-8') as ff:
                for line in ff.readlines():
                    cognate_pair = line.split(',')
                    cognate_pair = (cognate_pair[0].lower(), cognate_pair[1][:-1].lower())
                    cognates_list.append(cognate_pair)
        return cognates_list

    def make_n_gram_first(self, word, k: int) -> tuple[str]:
        if self._is_ipa:
            tup = [''] + word
        else:
            tup = [''] + list(word)
        new_tup = tup[k: k + 2]
        return tuple(new_tup)

    def make_n_gram_second(self, word, k: int) -> tuple[str]:
        if self._is_ipa:
            tup = word + ['']
        else:
            tup = list(word) + ['']
        new_tup = tup[k: k + 2]
        return tuple(new_tup)

    def initialize(self, word_1: str, word_2: str) -> tuple[dict, dict]:
        # Set up:
        all_operations, distances = {(-1, -1): []}, {(-1, -1): 0}
        # Initialize the arrays:
        for i in range(len(word_1)):
            distances[(i, -1)] = self._deletion[self.make_n_gram(word_1, i)] + distances[(i - 1, -1)]
            all_operations[(i, -1)] = all_operations[(i - 1, -1)] + [('Deletion', self.make_n_gram(word_1, i))]
        for j in range(len(word_2)):
            distances[(-1, j)] = self._insertion[self.make_n_gram(word_2, j)] + distances[(-1, j - 1)]
            all_operations[(-1, j)] = all_operations[(-1, j - 1)] + [('Insertion', self.make_n_gram(word_2, j))]
        return all_operations, distances

    def edit_distance(self, word_1: str, word_2: str) -> float:
        # Set up:
        if not self._is_ipa:
            word_1, word_2 = word_1.lower(), word_2.lower()
        len_1, len_2 = len(word_1), len(word_2)
        all_operations, distances = self.initialize(word_1, word_2)
        # Fill distances and operations:
        for i in range(len_1):
            for j in range(len_2):
                w1_ngram, w2_ngram = self.make_n_gram(word_1, i), self.make_n_gram(word_2, j)
                if word_1[i] == word_2[j]:
                    cost = 0
                    c = []
                else:
                    cost = self._substitution[(w1_ngram, w2_ngram)]
                    c = [('Substitution', (w1_ngram, w2_ngram))]
                curr_operations = [all_operations[(i, j - 1)] + [('Insertion', w2_ngram)],
                                   all_operations[(i - 1, j)] + [('Deletion', w1_ngram)],
                                   all_operations[(i - 1, j - 1)] + c]
                curr_distances = [distances[(i, j - 1)] + self._insertion[w2_ngram],
                                  distances[(i - 1, j)] + self._deletion[w1_ngram],
                                  distances[(i - 1, j - 1)] + cost]
                ind = argmin(curr_distances)
                distances[(i, j)] = curr_distances[ind]
                all_operations[(i, j)] = curr_operations[ind]
        self._operations += all_operations[(len_1 - 1, len_2 - 1)]
        return distances[(len_1 - 1, len_2 - 1)]

    def set_weights(self):
        for pair in self._cognates:
            self.edit_distance(pair[0], pair[1])
        all_ops = Counter(self._operations)
        for op in all_ops.keys():
            print(op, all_ops[op], float(1 / (2 * all_ops[op] + 1)))
            if op[0] == 'Insertion':
                self._insertion[op[1]] = float(1 / (2 * all_ops[op] + 1))
            elif op[0] == 'Deletion':
                self._deletion[op[1]] = float(1 / (2 * all_ops[op] + 1))
            else:
                self._substitution[op[1]] = float(1 / (2 * all_ops[op] + 1))
            print(op[0], float(1 / (2 * all_ops[op] + 1)))

    def save_weights(self, ins: str, de: str, sub: str):
        with open(ins, 'wb') as f:
            pickle.dump(self._insertion, f)
        with open(de, 'wb') as f:
            pickle.dump(self._deletion, f,)
        with open(sub, 'wb') as f:
            pickle.dump(self._substitution, f)

    def test_distances(self):
        with open(f'test_bigrams_{self._ind}.txt', 'w') as f:
            for pair in self._cognates:
                e = self.edit_distance(pair[0], pair[1])
                f.write(f'{pair[0]} - {pair[1]}: {e}\n')

In [58]:
cognates_first = Cognates(COGNATES_LIST, INSERTION_0, DELETION_0, SUBSTITUTION_0, 0)
cognates_second = Cognates(COGNATES_LIST, INSERTION_1, DELETION_1, SUBSTITUTION_1, 1)

In [59]:
cognates_first.set_weights()
cognates_second.set_weights()

('Substitution', (('', 'a'), ('', 'á'))) 11 0.043478260869565216
Substitution 0.043478260869565216
('Substitution', (('c', 'u'), ('c', 'o'))) 6 0.07692307692307693
Substitution 0.07692307692307693
('Deletion', ('u', 's')) 62 0.008
Deletion 0.008
('Insertion', ('n', 'a')) 38 0.012987012987012988
Insertion 0.012987012987012988
('Insertion', ('a', 'r')) 203 0.002457002457002457
Insertion 0.002457002457002457
('Substitution', (('n', 'e'), ('n', 'a'))) 39 0.012658227848101266
Substitution 0.012658227848101266
('Insertion', ('d', 'o')) 179 0.002785515320334262
Insertion 0.002785515320334262
('Substitution', (('n', 'm'), ('n', 'o'))) 3 0.14285714285714285
Substitution 0.14285714285714285
('Deletion', ('m', 'e')) 9 0.05263157894736842
Deletion 0.05263157894736842
('Deletion', ('e', 'n')) 14 0.034482758620689655
Deletion 0.034482758620689655
('Deletion', ('n', 't')) 16 0.030303030303030304
Deletion 0.030303030303030304
('Deletion', ('b', 'b')) 2 0.2
Deletion 0.2
('Substitution', (('a', 't'), ('

('Substitution', (('a', 'b'), ('á', 'b'))) 4 0.1111111111111111
Substitution 0.1111111111111111
('Substitution', (('u', 's'), ('o', ''))) 18 0.02702702702702703
Substitution 0.02702702702702703
('Deletion', ('s', '')) 73 0.006802721088435374
Deletion 0.006802721088435374
('Insertion', ('a', 'r')) 114 0.004366812227074236
Insertion 0.004366812227074236
('Insertion', ('r', '')) 309 0.0016155088852988692
Insertion 0.0016155088852988692
('Substitution', (('e', 'd'), ('a', 'd'))) 57 0.008695652173913044
Substitution 0.008695652173913044
('Insertion', ('o', '')) 733 0.0006816632583503749
Insertion 0.0006816632583503749
('Substitution', (('m', 'e'), ('o', ''))) 4 0.1111111111111111
Substitution 0.1111111111111111
('Deletion', ('e', 'n')) 12 0.04
Deletion 0.04
('Deletion', ('n', 't')) 15 0.03225806451612903
Deletion 0.03225806451612903
('Deletion', ('t', '')) 27 0.01818181818181818
Deletion 0.01818181818181818
('Deletion', ('b', 'r')) 2 0.2
Deletion 0.2
('Substitution', (('t', 'e'), ('r', ''))

In [60]:
cognates_first.save_weights(BI_INSERTION_0, BI_DELETION_0, BI_SUBSTITUTION_0)
cognates_second.save_weights(BI_INSERTION_1, BI_DELETION_1, BI_SUBSTITUTION_1)

# cognates_first.save_weights(IPA_BI_INSERTION_0, IPA_BI_DELETION_0, IPA_BI_SUBSTITUTION_0)
# cognates_second.save_weights(IPA_BI_INSERTION_1, IPA_BI_DELETION_1, IPA_BI_SUBSTITUTION_1)

In [93]:
import pickle

to_change = {'ñ': 'n', 'ú': 'u', 'ó': 'o', 'é': 'e', 'á': 'a', 'í': 'i', 'ü': 'u'}

def add_wieghts_ins(weights_file: str, n: int, new_file: str):
    c = 0
    with open(weights_file, 'rb') as f:
        weight_dict = pickle.load(f)
    prev_keys = list(weight_dict.keys())
    for k in prev_keys:
        flag = False
        new_chars = list(k)
#         print(k, weight_dict[k])
        for i in range(n):
            if k[i] and k[i] in 'ñúóéáíü':
                new_chars[i] = to_change[k[i]]
                flag = True
        if flag:
            new_chars = tuple(new_chars)
            if n == 3:
                for tup in [new_chars, k]:
                    if tup not in weight_dict.keys():
                        weight_dict[tup] = 1.0
            if weight_dict[new_chars] != 1.0 and weight_dict[k] != 1.0:
                print('Both appeared!')
                print(new_chars, weight_dict[new_chars])
                print(k, weight_dict[k])
                prev = (1 / (weight_dict[new_chars])) - 1
                curr = (1 / (weight_dict[k])) - 1
                new_wieght = 1 / ((prev + curr) + 1)
                print(new_wieght)
                print()
            elif weight_dict[k] != 1.0:
                print('Spanish appeared!')
                print(new_chars, weight_dict[new_chars])
                print(k, weight_dict[k])
                new_wieght = weight_dict[k]
                print(new_wieght)
                print()
            else:
                new_wieght = weight_dict[new_chars]
            weight_dict[new_chars] = new_wieght
    with open(new_file, 'wb') as f:
        pickle.dump(weight_dict, f)


def add_wieghts_sub(weights_file: str, n: int, new_file: str):
    with open(weights_file, 'rb') as f:
        weight_dict = pickle.load(f)
    prev_keys = list(weight_dict.keys())
    for k in prev_keys:
        flag = False
        new_chars = list(k[1])
        for i in range(n):
            if k[1][i] and k[1][i] in 'ñúóéáíü':
                new_chars[i] = to_change[k[1][i]]
                flag = True
        if flag:
            new_chars = tuple(new_chars)
            if n == 3:
                for tup in [new_chars, k[1]]:
                    if (k[0], tup) not in weight_dict.keys():
                        weight_dict[(k[0], tup)] = 1.0
            if weight_dict[(k[0], new_chars)] != 1.0 and weight_dict[k] != 1.0:
                print('Both appeared!')
                print(new_chars, weight_dict[(k[0], new_chars)])
                print(k, weight_dict[k])
                prev = (1 / (weight_dict[(k[0], new_chars)])) - 1
                curr = (1 / (weight_dict[k])) - 1
                new_wieght = 1 / ((prev + curr) + 1)
                print(new_wieght)
                print()
            elif weight_dict[k] != 1.0:
                print('Spanish appeared!')
                print(new_chars, weight_dict[(k[0], new_chars)])
                print(k, weight_dict[k])
                new_wieght = weight_dict[k]
                print(new_wieght)
                print()
            else:
                new_wieght = weight_dict[(k[0], new_chars)]
            weight_dict[(k[0], new_chars)] = new_wieght
    with open(new_file, 'wb') as f:
        pickle.dump(weight_dict, f)


def check_wieght(weights_file: str, tup: str):
    with open(weights_file, 'rb') as f:
        weight_dict = pickle.load(f)
        if tup in weight_dict.keys():
            print(tup, weight_dict[tup])

In [98]:
ins = BI_INSERTION_1
new_ins = BI_INSERTION_1 + '_1'
check_wieght(ins, ('d', 'i'))
check_wieght(ins, ('d', 'í'))
add_wieghts_ins(ins, 2, new_ins)
print('After:')
check_wieght(new_ins, ('d', 'i'))

sub = BI_SUBSTITUTION_1
new_sub = BI_SUBSTITUTION_1 + '_1'
check_wieght(sub, (('e', 's'), ('i', 's')))
check_wieght(sub, (('e', 's'), ('í', 's')))
add_wieghts_sub(sub, 2, new_sub)
print('After:')
check_wieght(new_sub, (('e', 's'), ('i', 's')))

('d', 'i') 0.3333333333333333
('d', 'í') 1.0
('a', 'ñ') ('a', 'n')
('a', 'ú') ('a', 'u')
('a', 'ó') ('a', 'o')
('a', 'é') ('a', 'e')
('a', 'á') ('a', 'a')
('a', 'í') ('a', 'i')
('a', 'ü') ('a', 'u')
('b', 'ñ') ('b', 'n')
('b', 'ú') ('b', 'u')
('b', 'ó') ('b', 'o')
('b', 'é') ('b', 'e')
('b', 'á') ('b', 'a')
('b', 'í') ('b', 'i')
('b', 'ü') ('b', 'u')
('c', 'ñ') ('c', 'n')
('c', 'ú') ('c', 'u')
('c', 'ó') ('c', 'o')
('c', 'é') ('c', 'e')
('c', 'á') ('c', 'a')
('c', 'í') ('c', 'i')
('c', 'ü') ('c', 'u')
('d', 'ñ') ('d', 'n')
('d', 'ú') ('d', 'u')
('d', 'ó') ('d', 'o')
('d', 'é') ('d', 'e')
('d', 'á') ('d', 'a')
('d', 'í') ('d', 'i')
('d', 'ü') ('d', 'u')
('e', 'ñ') ('e', 'n')
('e', 'ú') ('e', 'u')
('e', 'ó') ('e', 'o')
('e', 'é') ('e', 'e')
('e', 'á') ('e', 'a')
('e', 'í') ('e', 'i')
('e', 'ü') ('e', 'u')
('f', 'ñ') ('f', 'n')
('f', 'ú') ('f', 'u')
('f', 'ó') ('f', 'o')
('f', 'é') ('f', 'e')
('f', 'á') ('f', 'a')
('f', 'í') ('f', 'i')
('f', 'ü') ('f', 'u')
('g', 'ñ') ('g', 'n')
('g', 'ú'

FileNotFoundError: [Errno 2] No such file or directory: '/home/yuli_zeira/Cognatehood/char_weights/alphabet/new/insertion_1_1'

In [100]:
import pickle
from numpy import argmin
from sys import argv
from collections import Counter


#  Constants:
N = 3


class TriCognates:

    def __init__(self, cognates_file: str, ipa=False):
        self._is_ipa = ipa
        self._cognates_file = cognates_file
        self._cognates = self.read_cognates()
        self._english_trigrams, self._spanish_trigrams = self.create_trigrams()
        self._insertion, self._deletion, self._substitution = self.create_operations()
        self._operations = list()

    def read_cognates(self) -> list[tuple[str, str]]:
        if not self._is_ipa:
            cognates_list = list()
            with open(self._cognates_file, 'r', encoding='utf-8') as ff:
                for line in ff.readlines():
                    cognate_pair = line.split(',')
                    cognate_pair = (cognate_pair[0].lower(), cognate_pair[1][:-1].lower())
                    cognates_list.append(cognate_pair)
        else:
            with open(self._cognates_file, 'rb') as ff:
                cognates_list = pickle.load(ff)
        return cognates_list

    def make_n_gram(self, word: str, k: int) -> tuple[str]:
        if self._is_ipa:
            tup = [''] + word + ['']
        else:
            tup = [''] + list(word) + ['']
        new_tup = tup[k: k + N]
        return tuple(new_tup)

    def create_trigrams(self) -> tuple[list, list]:
        en_trigrams = list()
        sp_trigrams = list()
        for eng_word, spa_word in self._cognates:
            en_trigrams += [self.make_n_gram(eng_word, i) for i in range(len(eng_word))]
            sp_trigrams += [self.make_n_gram(spa_word, i) for i in range(len(spa_word))]
        return list(set(en_trigrams)), list(set(sp_trigrams))

    def create_operations(self) -> tuple[dict, dict, dict]:
        insertion = dict([(i, 1.0) for i in self._spanish_trigrams])
        deletion = dict([(i, 1.0) for i in self._english_trigrams])
        substitution = dict([((i, j), 1.0) for i in self._english_trigrams for j in self._spanish_trigrams])
        return insertion, deletion, substitution

    def initialize(self, word_1: str, word_2: str) -> tuple[dict, dict]:
        # Set up:
        all_operations, distances = {(-1, -1): []}, {(-1, -1): 0}
        # Initialize the arrays:
        for i in range(len(word_1)):
            distances[(i, -1)] = self._deletion[self.make_n_gram(word_1, i)] + distances[(i - 1, -1)]
            all_operations[(i, -1)] = all_operations[(i - 1, -1)] + [('Deletion', self.make_n_gram(word_1, i))]
        for j in range(len(word_2)):
            distances[(-1, j)] = self._insertion[self.make_n_gram(word_2, j)] + distances[(-1, j - 1)]
            all_operations[(-1, j)] = all_operations[(-1, j - 1)] + [('Insertion', self.make_n_gram(word_2, j))]
        return all_operations, distances

    def edit_distance(self, word_1: str, word_2: str) -> float:
        # Set up:
        if not self._is_ipa:
            word_1, word_2 = word_1.lower(), word_2.lower()
        len_1, len_2 = len(word_1), len(word_2)
        all_operations, distances = self.initialize(word_1, word_2)
        # Fill distances and operations:
        for i in range(len_1):
            for j in range(len_2):
                w1_ngram, w2_ngram = self.make_n_gram(word_1, i), self.make_n_gram(word_2, j)
                if word_1[i] == word_2[j].lower():
                    cost = 0
                    c = []
                else:
                    cost = self._substitution[(w1_ngram, w2_ngram)]
                    c = [('Substitution', (w1_ngram, w2_ngram))]
                curr_operations = [all_operations[(i, j - 1)] + [('Insertion', w2_ngram)],
                                   all_operations[(i - 1, j)] + [('Deletion', w1_ngram)],
                                   all_operations[(i - 1, j - 1)] + c]
                curr_distances = [distances[(i, j - 1)] + self._insertion[w2_ngram],
                                  distances[(i - 1, j)] + self._deletion[w1_ngram],
                                  distances[(i - 1, j - 1)] + cost]
                ind = argmin(curr_distances)
                distances[(i, j)] = curr_distances[ind]
                all_operations[(i, j)] = curr_operations[ind]
        self._operations += all_operations[(len_1 - 1, len_2 - 1)]
        return distances[(len_1 - 1, len_2 - 1)]

    def set_weights(self):
        for pair in self._cognates:
            self.edit_distance(pair[0], pair[1])
        all_ops = Counter(self._operations)
        for op in all_ops.keys():
            print(op, all_ops[op])
            if op[0] == 'Insertion':
                self._insertion[op[1]] = float(1 / (2 * all_ops[op] + 1))
            elif op[0] == 'Deletion':
                self._deletion[op[1]] = float(1 / (2 * all_ops[op] + 1))
            else:
                self._substitution[op[1]] = float(1 / (2 * all_ops[op] + 1))
    
    def save_weights(self, ins: str, de: str, sub: str):
        with open(ins, 'wb') as f:
            pickle.dump(self._insertion, f)
        with open(de, 'wb') as f:
            pickle.dump(self._deletion, f,)
        with open(sub, 'wb') as f:
            pickle.dump(self._substitution, f)

In [106]:
cognates = TriCognates(COGNATES_LIST, True)

In [107]:
cognates.set_weights()

('Substitution', (('', 'æ', 'b'), ('', 'a', 'b'))) 18
('Substitution', (('b', 'ə', 'k'), ('b', 'a', 'k'))) 1
('Deletion', ('k', 'ə', 's')) 3
('Deletion', ('ə', 's', '')) 78
('Substitution', (('', 'ə', 'b'), ('', 'a', 'b'))) 21
('Substitution', (('b', 'æ', 'n'), ('b', 'a', 'n'))) 7
('Substitution', (('d', 'ə', 'n'), ('d', 'o', 'n'))) 3
('Insertion', ('n', 'a', '')) 21
('Insertion', ('n', 'a', 'd')) 13
('Deletion', ('n', 'm', 'ə')) 2
('Deletion', ('m', 'ə', 'n')) 9
('Deletion', ('ə', 'n', 't')) 14
('Deletion', ('n', 't', '')) 18
('Substitution', (('b', 'ɹ', 'i'), ('b', 'ɾ', 'e'))) 3
('Deletion', ('ɹ', 'i', 'v')) 2
('Deletion', ('i', 'v', 'i')) 4
('Deletion', ('v', 'i', 'e')) 4
('Substitution', (('e', 'j', 't'), ('e', 'b', 'ja'))) 1
('Substitution', (('j', 't', ''), ('b', 'ja', ''))) 1
('Substitution', (('e', 'j', 'ʃ'), ('e', 'b', 'ja'))) 1
('Substitution', (('j', 'ʃ', 'ə'), ('b', 'ja', 'si'))) 1
('Substitution', (('ʃ', 'ə', 'n'), ('ja', 'si', 'o'))) 1
('Substitution', (('ə', 'n', ''), ('

In [108]:
cognates.save_weights(IPA_TRI_INSERTION, IPA_TRI_DELETION, IPA_TRI_SUBSTITUTION)