In [24]:
import pandas
import os
import lingtypology
import math
import numpy as np
import matplotlib.pyplot as plt
from lingtypology.db_apis import Phoible
from scipy.stats import linregress, chi2_contingency

In [19]:
#Вытащим все бинарные фичи
p = Phoible(aggregated=False)
binary_features = []
df = p.get_df()
for col in df:
    if [cell for cell in set(df[col]) if cell in ('+', '-')] == ['-', '+']:
        binary_features.append(col)
binary_features

Moran, Steven & McCloy, Daniel (eds.) 2019.
PHOIBLE 2.0.
Jena: Max Planck Institute for the Science of Human History.
(Available online at http://phoible.org, Accessed on 2019-05-23.)


['syllabic',
 'short',
 'long',
 'consonantal',
 'sonorant',
 'continuant',
 'delayedRelease',
 'approximant',
 'tap',
 'trill',
 'nasal',
 'lateral',
 'labial',
 'round',
 'labiodental',
 'coronal',
 'anterior',
 'distributed',
 'strident',
 'dorsal',
 'high',
 'low',
 'front',
 'back',
 'tense',
 'retractedTongueRoot',
 'advancedTongueRoot',
 'periodicGlottalSource',
 'epilaryngealSource',
 'spreadGlottis',
 'constrictedGlottis',
 'fortis',
 'raisedLarynxEjective',
 'loweredLarynxImplosive',
 'click']

Посчитаем всё про бинарные фичи для датасетов из Phoible

In [25]:
def fwrite(path, data):
    with open(path, 'w', encoding='utf-8') as f:
        f.write(data)

def count_stats(subset, feature):
    p = Phoible(subset=subset, aggregated=False)
    p.show_citation = False
    data = p.get_df()
    amount_with_feature = data[data[feature] == '+'].groupby('Glottocode').size()
    
    languages = [lingtypology.glottolog.get_by_glot_id(glot_id) for glot_id in amount_with_feature.index]
    with_feature = pandas.DataFrame({
        'language': languages,
        feature: amount_with_feature,
        'elevation': lingtypology.get_elevations(languages),
    })
    with_feature = with_feature[with_feature.elevation != '']
    if with_feature.empty:
        print('No data: ' + subset)
        return

    #Зависит ли количество абруптивных/имплозивных в языках, где они суть, от высоты
    regression_no_zeros = linregress(
        list(map(int, with_feature[feature])),
        list(map(int, with_feature.elevation))
    )
    
    no_feature = data[~data.Glottocode.isin(list(amount_with_feature.index))]
    no_feature = no_feature.drop_duplicates(subset='Glottocode')
    languages = [lingtypology.glottolog.get_by_glot_id(glot_id) for glot_id in no_feature.Glottocode]
    no_feature = pandas.DataFrame({
        'language': languages,
        feature: 0,
        'elevation': lingtypology.get_elevations(languages),
    })
    no_feature = no_feature[no_feature.elevation != '']
    all_ = pandas.concat((with_feature, no_feature))

    #Зависит ли количество абруптивных/имплозивных во всех яхыках от высоты
    regression_with_zeros = linregress(
        list(map(int, all_[feature])),
        list(map(int, all_.elevation))
    )

    higher = all_[all_.elevation > 1500]
    higher = [len(higher[higher[feature] > 0]), len(higher[higher[feature] == 0])]
    lower = all_[all_.elevation <= 1500]
    lower = [len(lower[lower[feature] > 0]), len(lower[lower[feature] == 0])]
    table = [higher, lower]
    
    #Правда ли, что, если больше 1500 метров, то ты с фичёй?
    try:
        chi = chi2_contingency(table)
    except ValueError:
        chi = [math.nan, math.nan, math.nan, math.nan]
    
    #Нарисуем все графики и запишем все данные в файлы
    cdir = 'phoible_results' + os.path.sep + subset
    if not os.path.exists(cdir):
        os.mkdir(cdir)

    #График регрессия для языков с фичёй
    plt.scatter(with_feature[feature], with_feature.elevation, color='black')
    axes = plt.gca()
    x_vals = np.array(axes.get_xlim())
    y_vals = regression_no_zeros.intercept + regression_no_zeros.slope*x_vals 
    plt.plot(x_vals, y_vals, linewidth=3)
    plt.savefig(cdir + os.path.sep + '{}_linear_regression_only.png'.format(feature), format='PNG')
    plt.cla()
    plt.clf()
    
    #График регрессии для всех языков по фиче
    plt.scatter(all_[feature], all_.elevation, color='black')
    axes = plt.gca()
    x_vals = np.array(axes.get_xlim())
    y_vals = regression_with_zeros.intercept + regression_with_zeros.slope*x_vals 
    plt.plot(x_vals, y_vals, linewidth=3)
    plt.savefig(cdir + os.path.sep + '{}_linear_regression_all.png'.format(feature), format='PNG')
    plt.cla()
    plt.clf()
    
    #Результаты подсчёта регрессии
    reg_str = 'Slope:\t{slope}\nIntercept:\t{intercept}\nR_value:\t{rvalue}\nP_value:\t{pvalue}'
    fwrite(
        cdir + os.path.sep + '{}_linear_regression_only.csv'.format(feature),
        reg_str.format(
            slope = regression_no_zeros.slope,
            intercept = regression_no_zeros.intercept,
            rvalue = regression_no_zeros.rvalue,
            pvalue = regression_no_zeros.pvalue
        )
    )
    fwrite(
        cdir + os.path.sep + '{}_linear_regression_all.csv'.format(feature),
        reg_str.format(
            slope = regression_with_zeros.slope,
            intercept = regression_with_zeros.intercept,
            rvalue = regression_with_zeros.rvalue,
            pvalue = regression_with_zeros.pvalue
        )
    )
    
    #Результаты хи-квадрата
    fwrite(
        cdir + os.path.sep + '{}_chi2.csv'.format(feature),
        'chi2:\t{chi2}\nP_value:\t{pvalue}\nDegrees of freedom:\t{dof}\nExpected:\t{ex}'.format(
            chi2 = chi[0],
            pvalue = chi[1],
            dof = chi[2],
            ex = chi[3]
        )
    )
    
    #Чистые данные
    with_feature.to_csv(cdir + os.path.sep + 'with_{}_raw.csv'.format(feature))
    all_.to_csv(cdir + os.path.sep + '{}_all_raw.csv'.format(feature))
        
    return regression_no_zeros, regression_with_zeros, chi, subset

In [27]:
if __name__ == '__main__':
    features = binary_features#['loweredLarynxImplosive', 'raisedLarynxEjective']
    subsets = ['UPSID', 'SPA', 'AA', 'PH', 'GM', 'RA', 'SAPHON']
    results = {}
    if not os.path.exists('phoible_results'):
        os.mkdir('phoible_results')
    for feature in features:
        processed_subsets = []
        regressions_no_zeros = []
        regressions_with_zeros = []
        chi2s = []
        for subset in subsets:
            r = count_stats(subset, feature)
            if r:
                regressions_no_zeros.append(r[0])
                regressions_with_zeros.append(r[1])
                chi2s.append(r[2])
                processed_subsets.append(r[3])
        plt.close()
        result = pandas.DataFrame({
            'Dataset': processed_subsets,
            'Regression (only with feature)': ['%.015f' % r.pvalue for r in regressions_no_zeros],
            'Regression (all languages)': ['%.015f' % r.pvalue for r in regressions_with_zeros],
            'Chi2 Test': ['%.015f' % c[1] for c in chi2s]
        })
        results[feature] = result

Elevations for these languages were not found: Kaliai, Nama, Katcha
Elevations for these languages were not found: Ikwo, Ezaa
Elevations for these languages were not found: Mianmin, Bikele, Korafe, Mvumbo, Karo, Saanich, Lorette Huron, Endo, Kuay
Elevations for these languages were not found: Chaha, Besleri, Efutu, Frafra, Zayse, Ezha, Soddo, Mmani, Copi, Kambe, Oko, Pana, Ikalanga, Kauma, Dinka, Gumer, Moghamo
Elevations for these languages were not found: Naiki, Mising, Abujmaria
Elevations for these languages were not found: Karo, Miraña, Khithaulhu, Shipibo
Elevations for these languages were not found: Kaliai, Nama, Katcha
No data: AA
No data: PH
Elevations for these languages were not found: Chaha, Besleri, Efutu, Frafra, Zayse, Soddo, Ezha, Mmani, Copi, Kambe, Oko, Pana, Ikalanga, Kauma, Dinka, Gumer, Moghamo
Elevations for these languages were not found: Abujmaria
Elevations for these languages were not found: Naiki, Mising
No data: SAPHON
Elevations for these languages were no

  slope = r_num / ssxm
  sterrest = np.sqrt((1 - r**2) * ssym / ssxm / df)


Elevations for these languages were not found: Karo, Miraña, Khithaulhu, Shipibo
Elevations for these languages were not found: Kaliai, Nama, Katcha
Elevations for these languages were not found: Ikwo, Ezaa
Elevations for these languages were not found: Mianmin, Bikele, Korafe, Mvumbo, Karo, Saanich, Lorette Huron, Endo, Kuay
Elevations for these languages were not found: Chaha, Besleri, Efutu, Frafra, Zayse, Ezha, Soddo, Mmani, Copi, Kambe, Oko, Pana, Ikalanga, Kauma, Dinka, Gumer, Moghamo
Elevations for these languages were not found: Naiki, Mising, Abujmaria
Elevations for these languages were not found: Karo, Miraña, Khithaulhu, Shipibo
Elevations for these languages were not found: Kaliai, Nama, Katcha
Elevations for these languages were not found: Ikwo, Ezaa
Elevations for these languages were not found: Bikele, Saanich, Mvumbo, Endo, Kuay
Elevations for these languages were not found: Korafe, Karo, Mianmin, Lorette Huron
Elevations for these languages were not found: Chaha, Besl

Elevations for these languages were not found: Mianmin, Bikele, Korafe, Mvumbo, Karo, Saanich, Lorette Huron, Endo, Kuay
Elevations for these languages were not found: Chaha, Besleri, Efutu, Frafra, Zayse, Ezha, Soddo, Mmani, Copi, Kambe, Oko, Pana, Ikalanga, Kauma, Dinka, Gumer, Moghamo
Elevations for these languages were not found: Naiki, Mising, Abujmaria
Elevations for these languages were not found: Karo, Miraña, Khithaulhu, Shipibo
Elevations for these languages were not found: Kaliai, Nama, Katcha
Elevations for these languages were not found: Ikwo, Ezaa
Elevations for these languages were not found: Mianmin, Bikele, Korafe, Mvumbo, Karo, Saanich, Lorette Huron, Endo, Kuay
Elevations for these languages were not found: Chaha, Besleri, Efutu, Frafra, Zayse, Ezha, Soddo, Mmani, Copi, Kambe, Oko, Pana, Ikalanga, Kauma, Dinka, Gumer, Moghamo
Elevations for these languages were not found: Naiki, Mising, Abujmaria
Elevations for these languages were not found: Karo, Miraña, Khithaulhu

  t = r * np.sqrt(df / ((1.0 - r + TINY)*(1.0 + r + TINY)))
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)
  sterrest = np.sqrt((1 - r**2) * ssym / ssxm / df)


Elevations for these languages were not found: Kaliai, Nama, Katcha
No data: AA
Elevations for these languages were not found: Mianmin, Bikele, Korafe, Mvumbo, Saanich, Karo, Lorette Huron, Endo, Kuay
No data: GM
No data: RA
Elevations for these languages were not found: Karo, Miraña, Khithaulhu, Shipibo
Elevations for these languages were not found: Kaliai, Nama, Katcha
Elevations for these languages were not found: Ikwo, Ezaa
Elevations for these languages were not found: Mianmin, Bikele, Korafe, Mvumbo, Karo, Saanich, Lorette Huron, Endo, Kuay
Elevations for these languages were not found: Chaha, Besleri, Efutu, Frafra, Zayse, Ezha, Soddo, Mmani, Copi, Kambe, Oko, Pana, Ikalanga, Kauma, Dinka, Gumer, Moghamo
Elevations for these languages were not found: Naiki, Mising, Abujmaria
Elevations for these languages were not found: Karo, Miraña, Khithaulhu, Shipibo
No data: UPSID
No data: SPA
No data: AA
Elevations for these languages were not found: Mianmin, Bikele, Korafe, Mvumbo, Saanic

In [2]:
results['loweredLarynxImplosive']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.228379981008692,0.716143656466409,0.565424335905343
1,SPA,0.774091231559461,0.545937242806401,0.877635260028769
2,AA,0.023065509485688,0.153913810484667,0.567918568194081
3,PH,0.194855408626411,0.173191762052172,0.94554733555659
4,GM,0.588086348641805,0.480647669077945,0.224522852678438
5,RA,,0.442496601314041,0.321514293603832
6,SAPHON,0.851008521498705,0.601680995225095,0.643234605638429


In [35]:
results['raisedLarynxEjective']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.950559282993466,4.4964081592e-05,3.2921681908e-05
1,SPA,0.475539733143422,5.592842023e-06,0.000176784757431
2,PH,0.731523538203316,0.392451413030472,0.160190111324293
3,GM,0.038586492300174,0.0,0.0
4,SAPHON,0.018874875617294,5.031926e-09,0.000377241915218


In [39]:
results['syllabic']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.714546510658585,0.714546510658585,
1,SPA,0.183866281549319,0.183866281549319,
2,AA,0.077097246917253,0.077097246917253,
3,PH,0.983749390079008,0.983749390079008,
4,GM,0.218115090278548,0.218115090278548,
5,RA,0.16093412472429,0.16093412472429,
6,SAPHON,0.020217881983567,0.020217881983567,


In [40]:
results['short']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.798013305716141,0.578336291969756,0.730419672713904
1,SPA,0.921479927332101,0.477171707612139,0.497428201856995
2,GM,0.001663130139335,0.392657880440924,0.658720709358762
3,RA,0.534083274332239,0.000399934052352,0.08259276921486


In [41]:
results['long']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.192704480141262,0.380121052956903,0.620511832347898
1,SPA,0.612186347170644,0.42621448875623,0.831051620467149
2,AA,0.376356461462869,0.791289975455638,0.755885144448328
3,PH,0.438960462512865,0.853015733305962,0.254948600104008
4,GM,0.608244824361989,0.055922871994605,0.006986869609299
5,RA,0.029481909089814,0.728961569864188,0.112500539872434
6,SAPHON,0.025830676397968,0.352099004108709,0.02869427959889


In [42]:
results['consonantal']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,3.5263530941e-05,3.5263530941e-05,
1,SPA,0.13030620330309,0.13030620330309,
2,AA,0.175221546469116,0.175221546469116,
3,PH,1.137261567e-06,1.137261567e-06,
4,GM,0.00080904475384,0.00080904475384,
5,RA,0.15659706279315,0.15659706279315,
6,SAPHON,1.25957e-10,1.25957e-10,


In [43]:
results['sonorant']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.98824059778352,0.98824059778352,
1,SPA,0.161498221613695,0.161498221613695,
2,AA,0.092571541440871,0.092571541440871,
3,PH,0.788340489856373,0.788340489856373,
4,GM,0.679405363775753,0.679405363775753,
5,RA,0.199749311540741,0.199749311540741,
6,SAPHON,0.913321922603925,0.913321922603925,


In [44]:
results['continuant']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.022490279117195,0.022490279117195,
1,SPA,0.684345560418664,0.684345560418664,
2,AA,0.23340522027711,0.23340522027711,
3,PH,0.06683869436872,0.06683869436872,
4,GM,0.25269682590844,0.25269682590844,
5,RA,0.961067692299752,0.961067692299752,
6,SAPHON,0.9678397191937,0.9678397191937,


In [45]:
results['delayedRelease']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,4.66988833e-07,2.7714575e-08,0.610642343383107
1,SPA,0.011624999397568,0.01841953269492,0.433495995608892
2,AA,0.019772117941638,0.019772117941638,
3,PH,3.1681148e-08,1.2412621e-08,0.905140088960697
4,GM,1.639605236e-06,1.470994406e-06,0.843461590175212
5,RA,2.3421953139e-05,2.3421953139e-05,
6,SAPHON,7.6678859e-08,6.9203968e-08,0.485630464439581


In [46]:
results['approximant']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.807043508267431,0.807043508267431,
1,SPA,0.218985882382301,0.218985882382301,
2,AA,0.08159127237161,0.08159127237161,
3,PH,0.859901806188837,0.859901806188837,
4,GM,0.533685815663632,0.533685815663632,
5,RA,0.188637827668085,0.188637827668085,
6,SAPHON,0.391045805054316,0.391045805054316,


In [47]:
results['tap']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.384277244654051,0.78272087886818,0.92722783157449
1,SPA,0.718469474427442,0.722907337275525,0.987328733151668
2,AA,0.955104235321519,0.242247321623531,0.907576007474965
3,PH,0.198617298018152,0.356777762663314,0.790767254178321
4,GM,0.775476962949851,0.179749152905111,0.836723984942173
5,RA,0.272226149963113,0.06017197085787,0.112500539872434
6,SAPHON,0.470710976927236,0.071213296306088,0.349615490771714


In [48]:
results['trill']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.45077461503256,0.695194243552769,0.517374876065757
1,SPA,0.078867860515762,0.809318885212975,0.960465877523403
2,AA,0.490183129656513,0.271406708567318,0.486470077078995
3,PH,0.287693520574263,0.374047888119018,0.132695245671694
4,GM,0.012023385489005,0.403038079457651,0.949874476413349
5,RA,0.909428844156235,0.435494869318571,0.062153287592109
6,SAPHON,1.0,0.689593658626257,0.85201833901043


In [49]:
results['nasal']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.11614713488587,0.186683360599912,0.738778553384912
1,SPA,0.302102059261899,0.302102059261899,
2,AA,5.7593459129e-05,5.7593459129e-05,
3,PH,0.509356035860437,0.628611251262984,0.757345703655888
4,GM,0.014601215231409,0.009823086284412,0.160318898286199
5,RA,0.01871981642412,0.01871981642412,
6,SAPHON,0.038899071035937,0.073396224014476,0.711266316433214


In [50]:
results['lateral']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.593973342605729,0.332367615908262,0.117359459300816
1,SPA,0.087526995173467,0.43889907030825,0.546315254202639
2,AA,0.142441954079011,0.831945844906071,0.049114227976661
3,PH,0.498467229296274,0.247174770816989,0.320518850359147
4,GM,0.145945919723254,0.312110648277769,0.641480155123851
5,RA,0.323075935397107,0.161308749075815,0.930140153281564
6,SAPHON,3.864870809e-06,0.0,1.332869234e-06


In [51]:
results['labial']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.190192193594338,0.190192193594338,
1,SPA,0.993590832656635,0.944692562809949,0.378695084030329
2,AA,0.446974973940483,0.446974973940483,
3,PH,0.653036296908154,0.653036296908154,
4,GM,0.85970577653258,0.85970577653258,
5,RA,0.13648531004635,0.13648531004635,
6,SAPHON,0.712687616792032,0.712687616792032,


In [52]:
results['round']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.093377702156051,0.084079619644448,0.266708754830617
1,SPA,0.981278038889535,0.983118851048115,0.378695084030329
2,AA,0.910021957820454,0.910021957820454,
3,PH,0.516021567401251,0.516021567401251,
4,GM,0.528945498330977,0.543705610644131,0.160318898286199
5,RA,0.106726582553237,0.106726582553237,
6,SAPHON,0.102618465711868,0.102618465711868,


In [53]:
results['labiodental']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.162626155983254,0.912329511406523,0.892484946004062
1,SPA,0.156089395235325,0.066757571932955,0.159203697603773
2,AA,1.7867709808e-05,0.225217468329436,0.142775822518337
3,PH,0.240523911953929,0.599229917068007,0.800579090337547
4,GM,0.284910418509202,0.712547473576335,0.586944132789422
5,RA,0.545721279466335,0.56595992617401,0.924948654834964
6,SAPHON,0.635297077430596,0.967952983774695,0.845729289489074


In [54]:
results['coronal']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,1.0325468088e-05,1.0325468088e-05,
1,SPA,0.18865762465642,0.18865762465642,
2,AA,0.028469370027802,0.028469370027802,
3,PH,2.262798e-09,2.262798e-09,
4,GM,0.000412448750827,0.000412448750827,
5,RA,0.094343677467564,0.094343677467564,
6,SAPHON,1.614e-12,1.614e-12,


In [55]:
results['anterior']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.000164457391273,0.000164457391273,
1,SPA,0.472708231374753,0.472708231374753,
2,AA,0.021323270250405,0.021323270250405,
3,PH,9.9372975e-08,9.9372975e-08,
4,GM,0.000643762367301,0.000643762367301,
5,RA,0.009792117450891,0.009792117450891,
6,SAPHON,1.262789696e-05,1.262789696e-05,


In [56]:
results['distributed']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.00753841776003,0.00299326343826,0.887228972268843
1,SPA,0.550252284731491,0.665835413665914,0.277097744354564
2,AA,0.023381593369912,0.03350451024593,0.836472377297864
3,PH,1.3370949476e-05,4.47292278e-07,0.075265129745487
4,GM,0.000626764798911,0.00032037322976,0.457536182941717
5,RA,0.483800756201241,0.483800756201241,
6,SAPHON,1.1718e-11,2e-14,0.013946929422413


In [57]:
results['strident']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,6.5378e-11,4.182e-12,0.557649445245781
1,SPA,0.000812515410362,0.000708757031838,0.715924806038053
2,AA,0.041981090834881,0.041981090834881,
3,PH,7.8334e-11,3.935e-12,0.489576094798656
4,GM,2.082054895e-06,8.74116975e-07,0.386104899528716
5,RA,4.393593586e-06,7.890779983e-06,0.321514293603832
6,SAPHON,4.9985e-11,9.857e-12,0.370527723705834


In [58]:
results['dorsal']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.150098482264098,0.150098482264098,
1,SPA,0.624520809626625,0.624520809626625,
2,AA,0.128682110974915,0.128682110974915,
3,PH,0.177054239160767,0.177054239160767,
4,GM,0.798260247830952,0.798260247830952,
5,RA,0.683856622879392,0.683856622879392,
6,SAPHON,0.461746595244533,0.461746595244533,


In [59]:
results['high']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.258448594918757,0.258448594918757,
1,SPA,0.708310909341868,0.708310909341868,
2,AA,0.370333443183309,0.370333443183309,
3,PH,0.025610768212539,0.025610768212539,
4,GM,0.067727978274177,0.067727978274177,
5,RA,0.691020628518315,0.691020628518315,
6,SAPHON,0.005385181384067,0.005385181384067,


In [60]:
results['low']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.801542666662403,0.751982594207845,0.266708754830617
1,SPA,0.169751820374888,0.206020757345426,0.378695084030329
2,AA,0.648190339626524,0.648190339626524,
3,PH,0.374674981670514,0.648074485279237,0.59059100813213
4,GM,0.893104383603123,0.932686824822738,0.442999124066654
5,RA,0.126082626261733,0.088528643584099,0.321514293603832
6,SAPHON,0.756965010343754,0.756965010343754,


In [61]:
results['front']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.403774312770976,0.403774312770976,
1,SPA,0.107161537501031,0.107161537501031,
2,AA,0.221591585482701,0.221591585482701,
3,PH,0.167909970204763,0.167909970204763,
4,GM,0.872250122336608,0.89238736226569,0.160318898286199
5,RA,0.971025994154599,0.971025994154599,
6,SAPHON,0.142967793272154,0.142967793272154,


In [62]:
results['back']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.025605429075347,0.025605429075347,
1,SPA,0.980399488939603,0.980399488939603,
2,AA,0.003024437641611,0.003024437641611,
3,PH,0.470348336602331,0.470348336602331,
4,GM,0.748785924801087,0.728531361059822,0.160318898286199
5,RA,0.390588104344683,0.390588104344683,
6,SAPHON,0.49943077968694,0.49943077968694,


In [63]:
results['tense']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.270090170963538,0.309468301091991,0.266708754830617
1,SPA,0.778916154715334,0.778916154715334,
2,AA,0.10181980382935,0.10181980382935,
3,PH,0.128984015350159,0.124644933375832,0.255246281705311
4,GM,0.919609211584689,0.94234284612347,0.160318898286199
5,RA,0.849885859497966,0.849885859497966,
6,SAPHON,0.04272000879049,0.04272000879049,


In [64]:
results['retractedTongueRoot']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.015539450317761,0.009253335780697,0.124305390808548
1,SPA,0.452945792553326,0.474451517787331,0.893600396371416
2,AA,0.149804188762319,0.796334921134092,0.225210272657504
3,PH,0.352889676151753,0.595338912593216,0.866506575329833
4,GM,0.195213233101159,0.641020561604521,0.824181832981538
5,RA,1.0,0.323113921731846,0.930140153281564


In [65]:
results['advancedTongueRoot']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,SPA,,0.438041584685592,0.378695084030329
1,PH,,0.515687367857686,0.255246281705311
2,SAPHON,,0.68663119236097,0.186412393772568


In [66]:
results['periodicGlottalSource']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.490346593345146,0.490346593345146,
1,SPA,0.225773758695212,0.225773758695212,
2,AA,0.796982526053919,0.796982526053919,
3,PH,0.183612151556271,0.183612151556271,
4,GM,0.969710328791981,0.969710328791981,
5,RA,0.394474281070351,0.394474281070351,
6,SAPHON,0.655114755716625,0.655114755716625,


In [67]:
results['epilaryngealSource']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,PH,,0.936684029611671,0.255246281705311
1,GM,,0.420654811122601,0.160318898286199


In [68]:
results['spreadGlottis']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.000490305234755,1.4163389814e-05,0.362376239087925
1,SPA,0.406244666572278,0.164776507621396,0.885780256031918
2,AA,0.400174335919465,0.173914095426191,0.130185303900607
3,PH,0.001648477970985,0.000139286706682,0.808968585395863
4,GM,0.02409860569845,0.001180356714365,0.048041180787709
5,RA,0.73937923082207,0.557100345489714,0.894069996254116
6,SAPHON,1.350835445e-06,0.003637450714112,0.009041205598374


In [69]:
results['constrictedGlottis']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,UPSID,0.059661940484604,0.00145231764083,0.127966661243741
1,SPA,0.000369673809172,2.5108037804e-05,0.132809308454472
2,AA,0.048275960072714,0.006714278957214,0.649133295389312
3,PH,0.737199705286507,0.487842743316908,0.14319697650972
4,GM,5e-14,0.0,0.005662174866105
5,RA,0.599652308566943,0.488967187726131,0.12442786417134
6,SAPHON,3.466691807e-06,0.00283018790431,0.342260126597239


In [70]:
results['fortis']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,SPA,0.569029949059862,0.431696418704604,0.808314671561521


In [73]:
results['click']

Unnamed: 0,Dataset,Regression (only with feature),Regression (all languages),Chi2 Test
0,GM,,0.420654811122605,0.160318898286199
