# SOUND CORRESPONDENCE APPLIER

In [18]:
from loanpy.scapplier import Adrc

adrc = Adrc("examples/sc2.json", "examples/inv.json")
adrc.sc

[{'d': ['d', 't'], 'a': ['a', 'o']},
 {'d d': 5, 'd t': 4, 'a a': 7, 'a o': 1},
 {},
 {'CVCV': ['CVC']}]

In [19]:
adrc.prosodic_inventory

['CV', 'CVV']

In [20]:
adrc.set_sc("lol")
adrc.sc

'lol'

In [21]:
adrc.set_prosodic_inventory("rofl")
adrc.prosodic_inventory

'rofl'

In [22]:
from loanpy.scapplier import Adrc

adrc = Adrc("examples/sc2.json", "examples/inv.json")
adrc.adapt("d a d a")

['dada']

In [23]:
adrc.adapt("d a d a", 5, "CVCV")  # sc2.json says CVCV to CVC

['dad', 'dat', 'dod', 'dot', 'tad']

In [24]:
adrc.adapt("d a d", 5, "CVC")   # no info on CVC in sc2.json, closest in inventory is CV

['da', 'do', 'ta', 'to']

In [25]:
adrc.reconstruct("d a d a")

'^(d)(a)(d)(a)$'

In [26]:
adrc.reconstruct("d a d a", 1000)

'^(d|t)(a|o)(d|t)(a|o)$'

In [27]:
adrc.reconstruct("l a l a")

'l not old'

In [28]:
adrc.repair_phonotactics(["d", "a", "d", "a"], "CVCV")

['d', 'a', 'd']

In [29]:
adrc.set_sc([{}, {"k k": 2, "k c": 1, "i e": 2, "i o": 1}, {}, {}, {}, {}, {}])
sclistlist = [["k", "c", "$"], ["e", "o", "$"], ["k", "c", "$"], ["e", "o", "$"]]
adrc.get_diff(sclistlist, ["k", "i", "k", "i"])

[1, 1, 1, 1]

In [30]:
from loanpy.scapplier import Adrc

adrc = Adrc()

adrc.set_sc([{"k": ["k", "h"], "i": ["e", "o"]},
             {"k k": 5, "k c": 3, "i e": 2, "i o": 1},
             {}, {}, {}, {}, {}])
sclistlist = [["k", "c", "$"], ["e", "o", "$"], ["k", "c", "$"], ["e", "o", "$"]]
adrc.read_sc(["k", "i"], 2)
# difference between i e and i o = 2 - 1 = 1
# and between k k and k c = 5 - 3 = 2
# so picking the "o" makes less of a difference than the "c"

[['k'], ['e', 'o']]

In [31]:
from loanpy.scapplier import Adrc

adrc = Adrc("", "examples/inv.json")

adrc.get_closest_phonotactics("CVC")

'CV'

In [32]:
adrc.get_closest_phonotactics("CVCV")

'CVV'

In [33]:
from loanpy.scapplier import move_sc
move_sc([["x", "x"]], 0, [[]])

([['x']], [['x']])

In [34]:
move_sc([["x", "x"], ["y", "y"], ["z"]], 1, [["a"], ["b"], ["c"]])

([['x', 'x'], ['y'], ['z']], [['a'], ['b', 'y'], ['c']])

In [40]:
from loanpy.scapplier import edit_distance_with2ops
edit_distance_with2ops("rajka", "ajka", w_del=100, w_ins=49)

100

In [41]:
edit_distance_with2ops("ajka", "rajka", w_del=100, w_ins=49)

49

In [45]:
edit_distance_with2ops("Bécs", "Pécs", w_del=100, w_ins=49)

149

In [44]:
edit_distance_with2ops("Hegyeshalom", "Mosonmagyaróvár", w_del=100, w_ins=49)

1388

In [46]:
from loanpy.scapplier import apply_edit
apply_edit(
      ['f', 'ɛ', 'r', 'i', 'h', 'ɛ', 'ɟ'],
      ('insert d',
       'insert u',
       'insert n',
       'insert ɒ',
       'insert p',
       'substitute f by ɒ',
       'delete ɛ',
       'keep r',
       'delete i',
       'delete h',
       'delete ɛ',
       'substitute ɟ by t')
)

['d', 'u', 'n', 'ɒ', 'p', 'ɒ', 'r', 't']

In [47]:
from loanpy.scapplier import list2regex
list2regex(["b", "k", "-", "v"])

'(b|k|v)?'

In [49]:
from loanpy.scapplier import tuples2editops
tuples2editops([(0, 0), (0, 1), (1, 1), (2, 2)], "ló", "hó")

['substitute l by h', 'keep ó']

In [50]:
tuples2editops([(0, 0), (1, 1), (2, 2), (2, 3)], "lóh", "ló")

['keep l', 'keep ó', 'delete h']

In [51]:
from loanpy.scapplier import substitute_operations
substitute_operations(['insert A', 'delete B', 'insert C'])

['substitute B by A', 'insert C']

In [52]:
substitute_operations(['delete A', 'insert B', 'delete C', 'insert D'])

['substitute A by B', 'substitute C by D']

In [53]:
from loanpy.scapplier import get_mtx
get_mtx("Bécs", "Pécs")

[[0, 1, 2, 3, 4],
 [1, 2, 3, 4, 5],
 [2, 3, 2, 3, 4],
 [3, 4, 3, 2, 3],
 [4, 5, 4, 3, 2]]

In [55]:
from loanpy.scapplier import add_edge
graph = {'A': {'B': 3}}
add_edge(graph, 'A', 'C', 7)
graph

{'A': {'B': 3, 'C': 7}}

In [57]:
from loanpy.scapplier import mtx2graph
mtx2graph([[0, 1, 2], [1, 2, 3], [2, 3, 2]])

{(0, 0): {(0, 1): 100, (1, 0): 49},
 (0, 1): {(0, 2): 100, (1, 1): 49},
 (0, 2): {(1, 2): 49},
 (1, 0): {(1, 1): 100, (2, 0): 49},
 (1, 1): {(1, 2): 100, (2, 1): 49, (2, 2): 0},
 (1, 2): {(2, 2): 49},
 (2, 0): {(2, 1): 100},
 (2, 1): {(2, 2): 100},
 (2, 2): {}}

In [58]:
from loanpy.scapplier import dijkstra
graph1 = {
        'A': {'B': 1, 'C': 4},
        'B': {'C': 2, 'D': 6},
        'C': {'D': 3},
        'D': {}
    }
dijkstra(graph1, 'A', 'D')

['A', 'B', 'C', 'D']

# EVALUATE SOUND CORRESPONDENCE APPLIER

In [None]:
from loanpy.eval_sca import eval_one

intable = [ # regular sound correspondences
  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'g i g i', 'VCVCV'],
  ['2', '2', 'H', 'i k k i', 'VCV'],
  ['3', '2', 'EAH', 'i g g i', 'VCCVC']
]

eval_one(intable, "", False, 1)

In [None]:
intable = [ # not enough regular sound correspondences
  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'g i g i', 'VCVCV'],
  ['2', '2', 'H', 'b u b a', 'VCV'],
  ['3', '2', 'EAH', 'p u p a', 'VCCVC']
]

eval_one(intable, "", False, 1)

In [None]:
intable = [ # irregular sound correspondences
  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'k i g i', 'VCVCV'],
  ['2', '2', 'H', 'i k k i', 'VCV'],
  ['3', '2', 'EAH', 'i g k i', 'VCCVC']
]

eval_one(intable, "", False, 1)

In [None]:
intable = [  # irregular sound correspondences
  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'k i g i', 'VCVCV'],
  ['2', '2', 'H', 'i k k i', 'VCV'],
  ['3', '2', 'EAH', 'i g k i', 'VCCVC']
]

eval_one(intable, "", False, 2)  # increase rate of false positives

In [None]:
from loanpy.eval_sca import eval_all

intable = [  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'k i g i', 'VCVCV'],
  ['2', '2', 'H', 'i k k i', 'VCV'],
  ['3', '2', 'EAH', 'i g k i', 'VCCVC']
]

eval_all(intable, "", False, [1, 2, 3])

# LOAN FINDER

In [None]:
from loanpy.loanfinder import phonetic_matches

donor = [
['a0', 'Donorese-0', 'igig'],
['a1', 'Donorese-1', 'iggi']
]

recipient = [
['0', 'Recipientese-0', '^(i|u)(g)(g)(i|u)$'],
['1', 'Recipientese-1', '^(i|u)(i|u)(g)(g)$']
]

outpath = "examples/phonetic_matches.tsv"

phonetic_matches(recipient, donor, outpath)

with open(outpath, "r") as f:
        print(f.read())

In [None]:
from loanpy.loanfinder import semantic_matches

def getsemsim(x, y):
    return 0.75

phmtsv = [
    ["ID", "ID_rc", "ID_ad"],
    ["0", "Recipientese-0", "Donorese-1", "cat", "dog"]
]

outpath = "examples/phonetic_matches.tsv"

semantic_matches(phmtsv, getsemsim, outpath)

with open(outpath, "r") as f:
        print(f.read())

# UTILITY FUNCTIONS

In [None]:
from loanpy.utils import find_optimal_year_cutoff
tsv = [
        ['form', 'sense', 'Year', 'Etymology', 'Loan'],
        ['gulyás', 'goulash, Hungarian stew', '1861', 'internal', 'False'],
        ['Tisza', 'a major river in Hungary', '1230', 'uncertain', ''],
        ['Pest', 'part of Budapest, the capital', '1241', 'Slavic', 'True'],
        ['paprika', 'ground red pepper, spice', '1748', 'Slavic', 'True']
      ]
find_optimal_year_cutoff(tsv, "Slavic")

In [None]:
from loanpy.utils import cvgaps
cvgaps("b l -", "b l a")

In [None]:
cvgaps("b - a", "b l a")

In [None]:
from loanpy.utils import prefilter
data = [
['x', 'x', 'Language_ID', 'x', 'x', 'x', 'x', 'x', 'x', 'Cognacy', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '2', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '3', 'x'],
['x', 'x', 'nl', 'x', 'x', 'x', 'x', 'x', 'x', '4', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '4', 'x'],
['x', 'x', 'nl', 'x', 'x', 'x', 'x', 'x', 'x', '5', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '5', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x'],
['x', 'x', 'nl', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x']
]
prefilter(data, "de", "en")

In [None]:
from loanpy.utils import is_valid_language_sequence
data = [  # no header!
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x']]
is_valid_language_sequence(data, "de", "en")

In [None]:
from loanpy.utils import is_valid_language_sequence
data = [  # no header!
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x'],
 ['x', 'x', 'nl', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x']]
is_valid_language_sequence(data, "de", "en")

In [None]:
from loanpy.utils import is_same_length_alignments
is_same_length_alignments([[0, 1, 2, "a - c", 4, 5], [0, 1, 2, "d e f", 4, 5]])

In [None]:
is_same_length_alignments([[0, 1, 2, "a - c", 4, 5], [0, 1, 2, "d e", 4, 5]])

In [None]:
from loanpy.utils import read_ipa_all
ipa_all = read_ipa_all()
type(ipa_all)

In [None]:
len(ipa_all)

In [None]:
ipa_all[:2]

In [None]:
from loanpy.utils import prod
prod([1, 2, 3])  # one times two times three

In [None]:
from loanpy.utils import IPA
ipa = IPA()
type(ipa.vowels)

In [None]:
len(ipa.vowels)

In [None]:
ipa.vowels[0]

In [None]:
from loanpy.utils import IPA
ipa = IPA()
ipa.get_cv("p")

In [None]:
ipa.get_cv("u")

In [None]:
from loanpy.utils import IPA
ipa = IPA()
ipa.get_prosody("l o l")

In [None]:
ipa.get_prosody("r o f.l")

In [None]:
from loanpy.utils import IPA
ipa = IPA()
ipa.get_clusters(["r", "a", "u", "f", "l"])

In [None]:
import json
from loanpy.utils import scjson2tsv

sc = [{"a": ["o", "e"]}, {"a o": 1, "a e": 2}, {"a o": [512],
       "a e": [3, 4]}, {"CV": ["CV"]}, {"CV CV": 1}, {"CV CV": [7]}]
with open("examples/sc.json", "w+") as f:
    json.dump(sc, f)
    
scjson2tsv("examples/sc.json", "examples/sc.tsv", "examples/sc_p.tsv")

with open("examples/sc.tsv", "r") as f:
    print(f.read())

In [None]:
with open("sc_p.tsv", "r") as f:
    print(f.read())

In [None]:
import os
os.remove("sc.tsv")
os.remove("sc_p.tsv")