In [2]:
import pandas as pd
import re
from itertools import chain

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [4]:
features = pd.read_excel('data//features.xlsx', index_col=0).T
print('Phone Inventory:')
print(', '.join([c for c in features.columns]))

Phone Inventory:
p, t, tʃ, k, b, d, dʒ, g, f, v, θ, ð, s, z, ʃ, ʒ, h, ʔ , m, n, ɲ, ŋ, r, l, j, w, ʔ, i, y, ɨ, u, e, ø, ə, o, æ, œ, ɑ, ɒ, ɪ , ʏ, ᵻ, ʊ, ε, ʌ, ɔ, a


In [5]:
ipa_chart = pd.read_excel('data//IPA chart.xlsx', index_col=0).T.reset_index(drop=True)
ipa_chart

Unnamed: 0,p,b,t,d,ʈ,ɖ,c,ɟ,k,g,...,ʍ,w,ɥ,ʜ,ʢ,ʡ,ʡ.1,ɕ,ʑ,ɺ
0,bilabial,bilabial,alveolar,alveolar,retroflex,retroflex,palatal,palatal,velar,velar,...,labial-velar,labial-velar,labial-palatal,epiglottal,epiglottal,epiglottal,epiglottal,alveolo-palatal,alveolo-palatal,alveolo-lateral
1,plosive,plosive,plosive,plosive,plosive,plosive,plosive,plosive,plosive,plosive,...,fricative,approximant,approximant,fricative,fricative,plosive,plosive,fricative,fricative,flap
2,voiceless,voiced,voiceless,voiced,voiceless,voiced,voiceless,voiced,voiceless,voiced,...,voiceless,voiced,voiced,voiceless,voiced,voiceless,voiced,voiceless,voiced,voiced


In [6]:
vowels = pd.read_excel('data//vowel chart.xlsx', index_col=0).T.reset_index(drop=True)
vowels

Unnamed: 0,i,y,ɨ,ʉ,ɯ,u,ɪ,ʏ,ʊ,e,...,ɜ,ɞ,ʌ,ɔ,æ,ɐ,a,ɶ,ɑ,ɒ
0,high,high,high,high,high,high,near-close,near-close,near-close,close-mid,...,open-mid,open-mid,open-mid,open-mid,near-open,near-open,open,open,open,open
1,front,front,central,central,back,back,front,front,back,front,...,central,central,back,back,front,central,front,front,back,back
2,unrounded,rounded,unrounded,rounded,unrounded,rounded,unrounded,rounded,unrounded,unrounded,...,unrounded,rounded,unrounded,rounded,unrounded,unrounded,unrounded,rounded,unrounded,rounded


In [7]:
def build_letter_list(features):
    
    group = []
    letter = None
    
    while letter != '0':
        letter = input('Enter a letter(0 to exit): ')
        if letter == '0':
            break
        elif letter not in features.columns:
            print('Not in data.')
            continue
        else:
            group.append(letter.strip().lower())
    
    return group

In [8]:
# Compares groups of phones and gives similar features within groups but different across groups
# to just get commonalities of a group, enter '0' as the first letter in Group2
def get_features(features):    
    
    print('Group 1:')    
    group1 = build_letter_list(features)
    
    if len(group1) < 1:
        return None
    
    print('\nGroup2:')
    group2 = build_letter_list(features)
    
    group1_list = [(a, list(set(b))[0]) for a, b in features[group1].iterrows() if len(set(b)) == 1]
    feature, value = zip(*group1_list)
    group1_similar = pd.Series(value, feature, name=', '.join(group1))
    
    if len(group2) > 0:        
        group2_list = [(a, list(set(b))[0]) for a, b in features[group2].iterrows() if len(set(b)) == 1]
        feature, value = zip(*group2_list)
        group2_similar = pd.Series(value, feature, name=', '.join(group2))
    
        different_features = []

        for a, b in pd.concat([group1_similar, group2_similar], axis=1).iterrows():
            if a in group1_similar.index and a in group2_similar.index:
                if b[0] != b[1] and isinstance(b[0], str) and isinstance(b[1], str):
                    different_features.append([a, b[0], b[1]])

        compare = pd.DataFrame(different_features, 
                               columns=['features', group1_similar.name, group2_similar.name]).set_index('features')
        
        
        return compare
            
    else:
        group1_similar = pd.DataFrame(group1_similar)
        return group1_similar

    
a = get_features(features)
a

Group 1:
Enter a letter(0 to exit): 0


In [10]:
get_features(vowels)

Group 1:
Enter a letter(0 to exit): a
Enter a letter(0 to exit): i
Enter a letter(0 to exit): 0

Group2:
Enter a letter(0 to exit): 0


Unnamed: 0,"a, i"
fronting,front
rounding,unrounded


In [10]:
get_features(ipa_chart)

Group 1:
Enter a letter(0 to exit): r
Enter a letter(0 to exit): m
Enter a letter(0 to exit): 0

Group2:
Enter a letter(0 to exit): 0


Unnamed: 0,"r, m"
2,voiced
