In [47]:
from pyclts.clts import CLTS
from lingpy.read.csv import csv2list
from collections import defaultdict

In [81]:
# Make sure that the CLTS data is in lib/PYVER/site-packages/data.
bipa = CLTS('bipa')
bipa['a:']

Vowel(clts=<pyclts.clts.CLTS object at 0x10c534e48>, grapheme='aː', source=None, generated=False, note=None, base=None, alias=None, unknown=None, roundedness='unrounded', height='open', nasalization=None, frication=None, duration='long', phonation=None, release=None, syllabicity=None, pharyngealization=None, rhotacization=None, centrality='front', glottalization=None, velarization=None)

In [3]:
class SegmentsInProfile:
    """Description."""
    
    def __init__(self, segment_list, profile_line):
        self.segment_list = segment_list
        self.profile_line = profile_line
        
        self.segment_info = {
            profile_line: segment_list
        }

In [4]:
def profile_to_segment_list(profile_tsv_path):
    row_number = 1
    segments = []
    
    for row in csv2list(profile_tsv_path)[1:]:
        row_number += 1
        segments.append(
            SegmentsInProfile(row[1].split(), row_number)
        )
        
    return segments

In [5]:
def check_conformity(segment_dict):
    conformity_dict = {}
    
    def check_bipa(segment_bipa_check):
        is_in_bipa = segment_bipa_check in bipa
        
        return [segment_bipa_check, is_in_bipa]
    
    for line, segment_list in segment_dict.items():
        local_segments = []
        
        for segment in segment_list:
            
            local_segments.append(check_bipa(segment))
        
        conformity_dict = {
            line: local_segments
        }
        
    return conformity_dict

In [66]:
def aggregate_segments(segment_dicts):
    aggregated_segments = defaultdict(list)
    
    for segment_dict in segment_dicts:
        for line, segments in segment_dict.segment_info.items():
            for segment in segments:
                    aggregated_segments[segment].append(line)
                    
    return aggregated_segments

In [67]:
aggregated = aggregate_segments(profile_to_segment_list('D_profile.tsv'))

In [61]:
def in_bipa(char):
    bchar = bipa[char]
    
    if bchar.type == 'unknownsound':
        return char, '?', 'unknown'
    if bchar.generated:
        return char, str(bchar), 'generated'
    if str(bchar) != char:
        return char, str(bchar), 'normalized'
    return char, str(bchar), ''

In [77]:
print(aggregated) # build table with tabulate

defaultdict(<class 'list'>, {'+': [2, 3, 20], 'NULL': [4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 26, 27, 28, 82, 85, 136, 303, 381, 392, 393, 395], '_': [8], 'ː': [15], '→/⁵⁵': [24], 'a': [25, 29, 30, 31, 32, 33, 34, 35, 36], '⁵': [29, 30, 38, 43, 48, 49, 57, 58, 66, 69, 72, 74, 75, 88, 89, 95, 110, 111, 118, 122, 127, 128, 141, 142, 149, 152, 157, 160, 161, 168, 175, 179, 180, 187, 191, 196, 201, 205, 210, 211, 223, 227, 228, 235, 240, 245, 251, 252, 257, 268, 273, 276, 282, 283, 286, 292, 296, 299, 304, 305, 314, 319, 323, 326, 329, 347, 348, 355, 360, 365, 366, 372, 375, 382, 386, 390, 396, 401], '¹': [31, 32, 39, 44, 53, 54, 62, 63, 67, 70, 76, 77, 90, 91, 100, 112, 113, 120, 123, 143, 144, 150, 153, 158, 162, 163, 169, 176, 181, 182, 188, 192, 197, 204, 208, 212, 213, 229, 230, 236, 241, 247, 254, 258, 267, 269, 274, 275, 277, 284, 287, 290, 293, 297, 300, 306, 307, 315, 320, 324, 327, 330, 349, 350, 356, 361, 367, 368, 373, 377, 383, 387, 391, 397, 400], '⁵¹'

In [76]:
in_bipa(list(aggregated.keys())[4])

('→/⁵⁵', '⁵⁵', 'normalilzed')

In [6]:
def calculate_conformity(conformity_dict):
    # TODO: Expand to check the *proper* thing (ask Mattis) and
    # improve the report.
    
    for line, segment_list in conformity_dict.items():
        for checked_segment in segment_list:
            if not checked_segment[1]:
                print(f"Problematic segment {checked_segment[0]} in line {line}.")

In [7]:
dogon_segments = profile_to_segment_list('D_profile.tsv')

print(dogon_segments[30].segment_info)

{32: ['a', '¹']}


In [8]:
check_conformity(dogon_segments[30].segment_info)

{32: [['a', True], ['¹', True]]}

In [9]:
for dogon_segment in dogon_segments:
    to_analyse = check_conformity(dogon_segment.segment_info)
    calculate_conformity(to_analyse)

# calculate_conformity(check_conformity(dogon_segments[10].segment_info))

Problematic segment NULL in line 4.
Problematic segment NULL in line 5.
Problematic segment NULL in line 6.
Problematic segment NULL in line 7.
Problematic segment NULL in line 9.
Problematic segment NULL in line 10.
Problematic segment NULL in line 11.
Problematic segment NULL in line 12.
Problematic segment NULL in line 13.
Problematic segment NULL in line 14.
Problematic segment ː in line 15.
Problematic segment NULL in line 16.
Problematic segment NULL in line 17.
Problematic segment NULL in line 18.
Problematic segment NULL in line 19.
Problematic segment NULL in line 21.
Problematic segment NULL in line 22.
Problematic segment NULL in line 23.
Problematic segment →/⁵⁵ in line 24.
Problematic segment NULL in line 26.
Problematic segment NULL in line 27.
Problematic segment NULL in line 28.
Problematic segment ãː in line 43.
Problematic segment ∼ in line 43.
Problematic segment ãː in line 44.
Problematic segment ∼ in line 44.
Problematic segment ãː in line 45.
Problematic segment ∼

In [None]:
# Notes for output:
# sound source = source_string
# bipa suggestion = bipa['source_string']
# normalized/alias/generated based on in_bipa function (see Mattis' comment)