# SOUND CORRESPONDENCE APPLIER

In [3]:
from loanpy.scapplier import Adrc

adrc = Adrc("examples/sc2.json", "examples/inv.json")
adrc.sc

[{'d': ['d', 't'], 'a': ['a', 'o']},
 {'d d': 5, 'd t': 4, 'a a': 7, 'a o': 1},
 {},
 {'CVCV': ['CVC']}]

In [4]:
adrc.prosodic_inventory

['CV', 'CVV']

In [5]:
adrc.set_sc("lol")
adrc.sc

'lol'

In [6]:
adrc.set_prosodic_inventory("rofl")
adrc.prosodic_inventory

'rofl'

In [8]:
from loanpy.scapplier import Adrc

adrc = Adrc("examples/sc2.json", "examples/inv.json")
adrc.adapt("d a d a")

['dada']

In [9]:
adrc.adapt("d a d a", 5, "CVCV")  # data says CVCV to CVC

['dad', 'dat', 'dod', 'dot', 'tad']

In [10]:
adrc.adapt("d a d", 5, "CVC")   # no data for CVC, closest in inventory is CV

['da', 'do', 'ta', 'to']

In [14]:
adrc.reconstruct("d a d a")

'^(d)(a)(d)(a)$'

In [15]:
adrc.reconstruct("d a d a", 1000)

'^(d|t)(a|o)(d|t)(a|o)$'

In [16]:
adrc.reconstruct("l a l a")

'l not old'

In [18]:
adrc.repair_phonotactics(["d", "a", "d", "a"], "CVCV")

['d', 'a', 'd']

# EVALUATE SOUND CORRESPONDENCE APPLIER

In [None]:
from loanpy.eval_sca import eval_one

intable = [ # regular sound correspondences
  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'g i g i', 'VCVCV'],
  ['2', '2', 'H', 'i k k i', 'VCV'],
  ['3', '2', 'EAH', 'i g g i', 'VCCVC']
]

eval_one(intable, "", False, 1)

In [None]:
intable = [ # not enough regular sound correspondences
  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'g i g i', 'VCVCV'],
  ['2', '2', 'H', 'b u b a', 'VCV'],
  ['3', '2', 'EAH', 'p u p a', 'VCCVC']
]

eval_one(intable, "", False, 1)

In [None]:
intable = [ # irregular sound correspondences
  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'k i g i', 'VCVCV'],
  ['2', '2', 'H', 'i k k i', 'VCV'],
  ['3', '2', 'EAH', 'i g k i', 'VCCVC']
]

eval_one(intable, "", False, 1)

In [None]:
intable = [  # irregular sound correspondences
  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'k i g i', 'VCVCV'],
  ['2', '2', 'H', 'i k k i', 'VCV'],
  ['3', '2', 'EAH', 'i g k i', 'VCCVC']
]

eval_one(intable, "", False, 2)  # increase rate of false positives

In [None]:
from loanpy.eval_sca import eval_all

intable = [  ['ID', 'COGID', 'DOCULECT', 'ALIGNMENT', 'PROSODY'],
  ['0', '1', 'H', 'k i k i', 'VC'],
  ['1', '1', 'EAH', 'k i g i', 'VCVCV'],
  ['2', '2', 'H', 'i k k i', 'VCV'],
  ['3', '2', 'EAH', 'i g k i', 'VCCVC']
]

eval_all(intable, "", False, [1, 2, 3])

# LOAN FINDER

In [None]:
from loanpy.loanfinder import phonetic_matches

donor = [
['a0', 'Donorese-0', 'igig'],
['a1', 'Donorese-1', 'iggi']
]

recipient = [
['0', 'Recipientese-0', '^(i|u)(g)(g)(i|u)$'],
['1', 'Recipientese-1', '^(i|u)(i|u)(g)(g)$']
]

outpath = "examples/phonetic_matches.tsv"

phonetic_matches(recipient, donor, outpath)

with open(outpath, "r") as f:
        print(f.read())

In [None]:
from loanpy.loanfinder import semantic_matches

def getsemsim(x, y):
    return 0.75

phmtsv = [
    ["ID", "ID_rc", "ID_ad"],
    ["0", "Recipientese-0", "Donorese-1", "cat", "dog"]
]

outpath = "examples/phonetic_matches.tsv"

semantic_matches(phmtsv, getsemsim, outpath)

with open(outpath, "r") as f:
        print(f.read())

# UTILITY FUNCTIONS

In [None]:
from loanpy.utils import find_optimal_year_cutoff
tsv = [
        ['form', 'sense', 'Year', 'Etymology', 'Loan'],
        ['gulyás', 'goulash, Hungarian stew', '1861', 'internal', 'False'],
        ['Tisza', 'a major river in Hungary', '1230', 'uncertain', ''],
        ['Pest', 'part of Budapest, the capital', '1241', 'Slavic', 'True'],
        ['paprika', 'ground red pepper, spice', '1748', 'Slavic', 'True']
      ]
find_optimal_year_cutoff(tsv, "Slavic")

In [None]:
from loanpy.utils import cvgaps
cvgaps("b l -", "b l a")

In [None]:
cvgaps("b - a", "b l a")

In [None]:
from loanpy.utils import prefilter
data = [
['x', 'x', 'Language_ID', 'x', 'x', 'x', 'x', 'x', 'x', 'Cognacy', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '2', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '3', 'x'],
['x', 'x', 'nl', 'x', 'x', 'x', 'x', 'x', 'x', '4', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '4', 'x'],
['x', 'x', 'nl', 'x', 'x', 'x', 'x', 'x', 'x', '5', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '5', 'x'],
['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x'],
['x', 'x', 'nl', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x'],
['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x']
]
prefilter(data, "de", "en")

In [None]:
from loanpy.utils import is_valid_language_sequence
data = [  # no header!
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x']]
is_valid_language_sequence(data, "de", "en")

In [None]:
from loanpy.utils import is_valid_language_sequence
data = [  # no header!
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '0', 'x'],
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
 ['x', 'x', 'en', 'x', 'x', 'x', 'x', 'x', 'x', '1', 'x'],
 ['x', 'x', 'de', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x'],
 ['x', 'x', 'nl', 'x', 'x', 'x', 'x', 'x', 'x', '6', 'x']]
is_valid_language_sequence(data, "de", "en")

In [None]:
from loanpy.utils import is_same_length_alignments
is_same_length_alignments([[0, 1, 2, "a - c", 4, 5], [0, 1, 2, "d e f", 4, 5]])

In [None]:
is_same_length_alignments([[0, 1, 2, "a - c", 4, 5], [0, 1, 2, "d e", 4, 5]])

In [None]:
from loanpy.utils import read_ipa_all
ipa_all = read_ipa_all()
type(ipa_all)

In [None]:
len(ipa_all)

In [None]:
ipa_all[:2]

In [None]:
from loanpy.utils import prod
prod([1, 2, 3])  # one times two times three

In [None]:
from loanpy.utils import IPA
ipa = IPA()
type(ipa.vowels)

In [None]:
len(ipa.vowels)

In [None]:
ipa.vowels[0]

In [None]:
from loanpy.utils import IPA
ipa = IPA()
ipa.get_cv("p")

In [None]:
ipa.get_cv("u")

In [None]:
from loanpy.utils import IPA
ipa = IPA()
ipa.get_prosody("l o l")

In [None]:
ipa.get_prosody("r o f.l")

In [None]:
from loanpy.utils import IPA
ipa = IPA()
ipa.get_clusters(["r", "a", "u", "f", "l"])

In [None]:
import json
from loanpy.utils import scjson2tsv

sc = [{"a": ["o", "e"]}, {"a o": 1, "a e": 2}, {"a o": [512],
       "a e": [3, 4]}, {"CV": ["CV"]}, {"CV CV": 1}, {"CV CV": [7]}]
with open("examples/sc.json", "w+") as f:
    json.dump(sc, f)
    
scjson2tsv("examples/sc.json", "examples/sc.tsv", "examples/sc_p.tsv")

with open("examples/sc.tsv", "r") as f:
    print(f.read())

In [None]:
with open("sc_p.tsv", "r") as f:
    print(f.read())

In [None]:
import os
os.remove("sc.tsv")
os.remove("sc_p.tsv")