In [1]:
import sqlite3

In [2]:
db = sqlite3.connect("/home/ivan/Datasets/clics-clics3-97832b5/clics.sqlite")

In [3]:
cu = db.cursor()

In [4]:
res = cu.execute('SELECT name FROM sqlite_master  WHERE type=\'table\'')

In [5]:
res.fetchall()

[('dataset',),
 ('datasetmeta',),
 ('SourceTable',),
 ('LanguageTable',),
 ('ParameterTable',),
 ('FormTable',),
 ('CognateTable',),
 ('CognateSource',),
 ('FormSource',),
 ('BorrowingTable',),
 ('ValueTable',),
 ('ValueSource',),
 ('BorrowingSource',)]

In [6]:
def execute(cursor, sql, as_dicts=True):
    res = cursor.execute(sql)
    cols = None
    if as_dicts:
        cols = [r[0] for r in res.description]
        result = []
        for row in res:
            result.append({k:v for k,v in zip(cols, row)})
        return result
    else:
        return res.fetchall()

In [7]:
execute(cu, "SELECT ID, dataset_ID, Glottolog_Name FROM LanguageTable WHERE Glottolog_Name  = 'Russian' ")

[{'ID': '204', 'dataset_ID': 'ids', 'Glottolog_Name': 'Russian'},
 {'ID': 'rus', 'dataset_ID': 'northeuralex', 'Glottolog_Name': 'Russian'},
 {'ID': '37000', 'dataset_ID': 'diacl', 'Glottolog_Name': 'Russian'}]

In [8]:
execute(cu, "SELECT ID, dataset_ID, Glottolog_Name FROM LanguageTable WHERE Glottolog_Name  LIKE  '%serb%' ")

[{'ID': '201',
  'dataset_ID': 'ids',
  'Glottolog_Name': 'Serbian-Croatian-Bosnian'},
 {'ID': '37500', 'dataset_ID': 'diacl', 'Glottolog_Name': 'Serbian Standard'}]

In [9]:
execute(cu, "SELECT ID, dataset_ID, Glottolog_Name FROM LanguageTable WHERE dataset_ID = 'diacl' AND Glottolog_Name IN ('German', 'English', 'Serbian Standard', 'Russian')")

[{'ID': '37000', 'dataset_ID': 'diacl', 'Glottolog_Name': 'Russian'},
 {'ID': '37500', 'dataset_ID': 'diacl', 'Glottolog_Name': 'Serbian Standard'},
 {'ID': '41500', 'dataset_ID': 'diacl', 'Glottolog_Name': 'English'},
 {'ID': '41700', 'dataset_ID': 'diacl', 'Glottolog_Name': 'German'}]

In [11]:
res = execute(cu, 'SELECT dataset_ID, COUNT(*) FROM FormTable GROUP BY dataset_ID')

In [12]:
res 

[{'dataset_ID': 'abrahammonpa', 'COUNT(*)': 8315},
 {'dataset_ID': 'allenbai', 'COUNT(*)': 4546},
 {'dataset_ID': 'bantubvd', 'COUNT(*)': 4257},
 {'dataset_ID': 'beidasinitic', 'COUNT(*)': 18059},
 {'dataset_ID': 'bodtkhobwa', 'COUNT(*)': 4720},
 {'dataset_ID': 'bowernpny', 'COUNT(*)': 44876},
 {'dataset_ID': 'castrosui', 'COUNT(*)': 9693},
 {'dataset_ID': 'chenhmongmien', 'COUNT(*)': 21967},
 {'dataset_ID': 'diacl', 'COUNT(*)': 60130},
 {'dataset_ID': 'halenepal', 'COUNT(*)': 11041},
 {'dataset_ID': 'hantganbangime', 'COUNT(*)': 6181},
 {'dataset_ID': 'hubercolumbian', 'COUNT(*)': 26726},
 {'dataset_ID': 'ids', 'COUNT(*)': 453975},
 {'dataset_ID': 'kraftchadic', 'COUNT(*)': 29059},
 {'dataset_ID': 'lexirumah', 'COUNT(*)': 117986},
 {'dataset_ID': 'logos', 'COUNT(*)': 3283},
 {'dataset_ID': 'marrisonnaga', 'COUNT(*)': 19200},
 {'dataset_ID': 'mitterhoferbena', 'COUNT(*)': 2017},
 {'dataset_ID': 'naganorgyalrongic', 'COUNT(*)': 10085},
 {'dataset_ID': 'northeuralex', 'COUNT(*)': 121612}

In [13]:
execute(cu, """SELECT Parameter_ID, COUNT(*) FROM FormTable 
WHERE dataset_ID = 'ids'  AND Language_ID = 204
GROUP BY Parameter_ID ORDER BY COUNT(*) DESC LIMIT 5""")

[{'Parameter_ID': '2-340', 'COUNT(*)': 5},
 {'Parameter_ID': '9-210', 'COUNT(*)': 4},
 {'Parameter_ID': '4-740', 'COUNT(*)': 4},
 {'Parameter_ID': '4-670', 'COUNT(*)': 4},
 {'Parameter_ID': '22-420', 'COUNT(*)': 4}]

In [14]:
execute(cu, """SELECT Form, COUNT(*) FROM FormTable 
WHERE dataset_ID = 'ids'  AND Language_ID = 204
GROUP BY Form ORDER BY COUNT(*) DESC LIMIT 5""")

[{'Form': 'vernyj', 'COUNT(*)': 3},
 {'Form': 'syroj', 'COUNT(*)': 3},
 {'Form': 'peč', 'COUNT(*)': 3},
 {'Form': 'my', 'COUNT(*)': 3},
 {'Form': 'idti', 'COUNT(*)': 3}]

In [31]:
class LanguageID:
    class IDS:
        ENGLISH = 190
        GERMAN = 194
        SERBO_CROATIAN = 201
        RUSSIAN = 204
    class DIACL:
        RUSSIAN = 37000
        SERBIAN = 37500
        GERMAN = 41700
        ENGLISH = 41500

In [24]:
def make_sql_list(xs):
    return ",".join(f"'{x}'" for x in xs)

In [26]:
parameters = [
    '2-340',
    '9-210',
    '4-740'
]

execute(cu, f"""
SELECT Parameter_ID, Value, Form, clics_form 
FROM FormTable 
WHERE  dataset_ID = 'ids' 
AND Language_ID = 201 
AND Parameter_ID IN ({make_sql_list(parameters)})
ORDER BY Parameter_ID, Form""")

[{'Parameter_ID': '2-340',
  'Value': 'brak',
  'Form': 'brak',
  'clics_form': 'brak'},
 {'Parameter_ID': '2-340',
  'Value': 'svadba',
  'Form': 'svadba',
  'clics_form': 'svadba'},
 {'Parameter_ID': '2-340',
  'Value': 'udaja',
  'Form': 'udaja',
  'clics_form': 'udaja'},
 {'Parameter_ID': '2-340',
  'Value': 'v(j)enčanje',
  'Form': 'vjenčanje',
  'clics_form': 'vjencanje'},
 {'Parameter_ID': '2-340',
  'Value': 'ženidba',
  'Form': 'ženidba',
  'clics_form': 'zenidba'},
 {'Parameter_ID': '4-740',
  'Value': 'živ',
  'Form': 'živ',
  'clics_form': 'ziv'},
 {'Parameter_ID': '4-740',
  'Value': 'živ(j)eti',
  'Form': 'živjeti',
  'clics_form': 'zivjeti'},
 {'Parameter_ID': '4-740',
  'Value': 'život',
  'Form': 'život',
  'clics_form': 'zivot'},
 {'Parameter_ID': '9-210',
  'Value': 'biti',
  'Form': 'biti',
  'clics_form': 'biti'},
 {'Parameter_ID': '9-210',
  'Value': 'lupiti',
  'Form': 'lupiti',
  'clics_form': 'lupiti'},
 {'Parameter_ID': '9-210',
  'Value': 'tući',
 

In [27]:
execute(cu, "SELECT COUNT(DISTINCT Language_ID) FROM FormTable WHERE  dataset_ID = 'ids'")

[{'COUNT(DISTINCT Language_ID)': 329}]

In [28]:
# execute(cu, """SELECT * FROM LanguageTable WHERE dataset_ID = 'ids' AND Family = 'Indo-European'
# AND
# (Name LIKE '%engl%' OR Name LIKE '%russ%'  OR Name LIKE '%serb%'  OR Name LIKE '%german%')""")

In [32]:
L = LanguageID.IDS

execute(cu, f"""
SELECT Language_ID, COUNT(*) AS countForms, COUNT(DISTINCT Parameter_ID) AS countParameters
FROM FormTable 
WHERE  dataset_ID = 'ids'
AND Language_ID IN ('{L.ENGLISH}', '{L.GERMAN}', '{L.SERBO_CROATIAN}', '{L.RUSSIAN}')
GROUP BY Language_ID
""")

[{'Language_ID': '190', 'countForms': 1553, 'countParameters': 1310},
 {'Language_ID': '194', 'countForms': 1898, 'countParameters': 1309},
 {'Language_ID': '201', 'countForms': 2119, 'countParameters': 1300},
 {'Language_ID': '204', 'countForms': 1695, 'countParameters': 1305}]

In [33]:
L = LanguageID.DIACL

execute(cu, f"""
SELECT Language_ID, COUNT(*) AS countForms, COUNT(DISTINCT Parameter_ID) AS countParameters
FROM FormTable 
WHERE  dataset_ID = 'diacl'
AND Language_ID IN ('{L.ENGLISH}', '{L.GERMAN}', '{L.SERBIAN}', '{L.RUSSIAN}')
GROUP BY Language_ID
""")

[{'Language_ID': '37000', 'countForms': 314, 'countParameters': 188},
 {'Language_ID': '37500', 'countForms': 295, 'countParameters': 182},
 {'Language_ID': '41500', 'countForms': 328, 'countParameters': 187},
 {'Language_ID': '41700', 'countForms': 334, 'countParameters': 189}]

In [37]:
DATASET = 'ids'

In [39]:
execute(cu, f"SELECT * FROM ParameterTable WHERE dataset_ID = '{DATASET}' LIMIT 5")

[{'ID': '1-100',
  'Name': 'world',
  'Concepticon_ID': '965',
  'Concepticon_Gloss': 'WORLD',
  'dataset_ID': 'ids',
  'Ontological_Category': 'Person/Thing',
  'Semantic_Field': 'The physical world',
  'Chinese_Gloss': None,
  'Number': None,
  'Spanish': None,
  'Gloss_in_digital_source': None,
  'NorthEuralex_Gloss': None,
  'DIACL_ID': None,
  'SrcId': None,
  'Description': None,
  'Indonesian': None,
  'Elicitation_Notes': None,
  'Core_Set': None,
  'Comment': None},
 {'ID': '1-210',
  'Name': 'earth, land',
  'Concepticon_ID': '626',
  'Concepticon_Gloss': 'LAND',
  'dataset_ID': 'ids',
  'Ontological_Category': 'Person/Thing',
  'Semantic_Field': 'The physical world',
  'Chinese_Gloss': None,
  'Number': None,
  'Spanish': None,
  'Gloss_in_digital_source': None,
  'NorthEuralex_Gloss': None,
  'DIACL_ID': None,
  'SrcId': None,
  'Description': None,
  'Indonesian': None,
  'Elicitation_Notes': None,
  'Core_Set': None,
  'Comment': None},
 {'ID': '1-212',
  'Name': 'earth=g

In [40]:
execute(cu, f"SELECT Semantic_Field, COUNT(*) FROM ParameterTable WHERE dataset_ID = '{DATASET}' GROUP BY Semantic_Field")

[{'Semantic_Field': 'Agriculture and vegetation', 'COUNT(*)': 68},
 {'Semantic_Field': 'Animals', 'COUNT(*)': 97},
 {'Semantic_Field': 'Basic actions and technology', 'COUNT(*)': 74},
 {'Semantic_Field': 'Clothing and grooming', 'COUNT(*)': 56},
 {'Semantic_Field': 'Cognition', 'COUNT(*)': 51},
 {'Semantic_Field': 'Emotions and values', 'COUNT(*)': 48},
 {'Semantic_Field': 'Food and drink', 'COUNT(*)': 80},
 {'Semantic_Field': 'Kinship', 'COUNT(*)': 74},
 {'Semantic_Field': 'Law', 'COUNT(*)': 26},
 {'Semantic_Field': 'Miscellaneous function words', 'COUNT(*)': 1},
 {'Semantic_Field': 'Motion', 'COUNT(*)': 77},
 {'Semantic_Field': 'Possession', 'COUNT(*)': 43},
 {'Semantic_Field': 'Quantity', 'COUNT(*)': 36},
 {'Semantic_Field': 'Religion and belief', 'COUNT(*)': 23},
 {'Semantic_Field': 'Sense perception', 'COUNT(*)': 49},
 {'Semantic_Field': 'Social and political relations', 'COUNT(*)': 35},
 {'Semantic_Field': 'Spatial relations', 'COUNT(*)': 70},
 {'Semantic_Field': 'Speech and lang

In [41]:
execute(cu, f"SELECT Ontological_Category, COUNT(*) FROM ParameterTable WHERE dataset_ID = '{DATASET}' GROUP BY Ontological_Category")

[{'Ontological_Category': 'Action/Process', 'COUNT(*)': 315},
 {'Ontological_Category': 'Number', 'COUNT(*)': 17},
 {'Ontological_Category': 'Other', 'COUNT(*)': 62},
 {'Ontological_Category': 'Person/Thing', 'COUNT(*)': 791},
 {'Ontological_Category': 'Property', 'COUNT(*)': 125}]

In [42]:
SLAVIC_SAMPLE = ['Serbian-Croatian-Bosnian', 'Czech', 'Polish', 'Russian']

In [43]:
GERMANIC_SAMPLE = [
    'German',
    'English',
    'Danish',
    'Swedish',
    'Dutch'
]

In [44]:
make_sql_list(GERMANIC_SAMPLE)

"'German','English','Danish','Swedish','Dutch'"

In [46]:
GERMANIC_GROUP = execute(cu, f"""
SELECT ID, Name, Glottolog_Name  FROM LanguageTable 
WHERE 
Family ='Indo-European' 
AND Glottolog_Name IN ({make_sql_list(GERMANIC_SAMPLE)})
AND dataset_ID = '{DATASET}' ORDER BY ID""")

In [47]:
GERMANIC_GROUP

[{'ID': '186', 'Name': 'Danish', 'Glottolog_Name': 'Danish'},
 {'ID': '187', 'Name': 'Swedish', 'Glottolog_Name': 'Swedish'},
 {'ID': '190', 'Name': 'English', 'Glottolog_Name': 'English'},
 {'ID': '191', 'Name': 'Dutch', 'Glottolog_Name': 'Dutch'},
 {'ID': '194', 'Name': 'German', 'Glottolog_Name': 'German'}]

In [48]:
SLAVIC_GROUP = execute(cu, f"""
SELECT ID, Name, Glottolog_Name  FROM LanguageTable 
WHERE 
Family ='Indo-European' 
AND Glottolog_Name IN ({make_sql_list(SLAVIC_SAMPLE)})
AND dataset_ID = '{DATASET}' ORDER BY ID""")

In [49]:
SLAVIC_GROUP

[{'ID': '201',
  'Name': 'Serbo-Croatian',
  'Glottolog_Name': 'Serbian-Croatian-Bosnian'},
 {'ID': '202', 'Name': 'Czech', 'Glottolog_Name': 'Czech'},
 {'ID': '203', 'Name': 'Polish', 'Glottolog_Name': 'Polish'},
 {'ID': '204', 'Name': 'Russian', 'Glottolog_Name': 'Russian'}]

Colexifications:
- two concepts are colexified in a language if there is a form used to mean both
- two languages agree on colexification if the same pair concept1-concept2 are colexified in both languages
- for each pair of concepts we could count how many languages in a group preserve colexification


In [51]:
CONCEPTS = execute(cu, f"""SELECT * FROM ParameterTable WHERE dataset_ID = '{DATASET}'""")

In [52]:
import pandas as pd

In [53]:
CONCEPTS_DF = pd.DataFrame(CONCEPTS)

In [54]:
ALL_LANGUAGES = GERMANIC_GROUP + SLAVIC_GROUP

In [55]:
ALL_LANGUAGE_IDS = [x['ID'] for x in ALL_LANGUAGES]

In [56]:
LANGUAGE_NAME_BY_ID = {l['ID']: l['Glottolog_Name'] for l in ALL_LANGUAGES}

In [57]:
def invert_dict(d):
    return {v:k for k,v in d.items()}

In [58]:
LANGUAGE_ID_BY_NAME = invert_dict(LANGUAGE_NAME_BY_ID)

In [60]:
CONCEPT_BY_ID = {x['ID']:x for x in CONCEPTS}

In [61]:
FORMS = execute(cu, f"""SELECT * FROM FormTable WHERE dataset_ID = '{DATASET}' AND Language_ID IN ({make_sql_list(ALL_LANGUAGE_IDS)})""")

In [62]:
len(FORMS)

16502

In [63]:
FORMS_DF = pd.DataFrame(FORMS)

In [64]:
FORMS_DF.sample(10)

Unnamed: 0,ID,Local_ID,Language_ID,Parameter_ID,Value,Form,Segments,Comment,Cognacy,Loan,...,salience,effect,contact_situation,original_script,diacl_id,meaning,meaning_note,transliteration,ipa,Local_Orthography
2015,187-12-810-1,,187,12-810,rund,rund,,,,,...,,,,,,,,,,
14429,203-5-791-1,,203,5-791,smar,smar,,,,,...,,,,,,,,,,
9131,201-11-610-2,,201,11-610,posuditi,posuditi,,,,,...,,,,,,,,,,
223,186-11-780-1,,186,11-780,løn,løn,,,,,...,,,,,,,,,,
5761,191-15-851-2,,191,15-851,lauw,lauw,,,,,...,,,,,,,,,,
3038,187-4-470-2,,187,4-470,moder-liv,moder-liv,,,,,...,,,,,,,,,,
6111,191-2-640-1,,191,2-640,schoondochter,schoondochter,,,,,...,,,,,,,,,,
431,186-14-530-1,,186,14-530,ur,ur,,,,,...,,,,,,,,,,
26,186-1-323-1,,186,1-323,ru,ru,,,,,...,,,,,,,,,,
3441,187-8-210-2,,187,8-210,plog,plog,,,,,...,,,,,,,,,,


In [65]:
from collections import defaultdict

In [67]:
concepts_forms_by_language = defaultdict(lambda:defaultdict(list))
for f in FORMS:
    lang_id = f['Language_ID']
    form = f['Form']
    concept = f['Parameter_ID']
    concepts_forms_by_language[lang_id][concept].append(form)

In [68]:
form_concepts_by_language = defaultdict(lambda:defaultdict(list))
for f in FORMS:
    lang_id = f['Language_ID']
    form = f['Form']
    concept = f['Parameter_ID']
    form_concepts_by_language[lang_id][form].append(concept)

In [69]:
from dataclasses import dataclass, field

In [70]:
@dataclass
class ColexificationData:
    n_colexifications:int = 0
    colexifying_forms: list = field(default_factory=list)

In [71]:
colexifications_by_language = defaultdict(lambda:defaultdict(ColexificationData))
for lang, form_concepts in form_concepts_by_language.items():
    for form, concepts in form_concepts.items():
        for concept_1 in concepts:
            for concept_2 in concepts:
                if concept_1 < concept_2: # avoid duplication
                    pair_info = colexifications_by_language[lang][(concept_1, concept_2)]
                    pair_info.n_colexifications += 1
                    pair_info.colexifying_forms.append(form)

In [72]:
@dataclass
class GroupColexificationData:
    n_languages:int = 0
    languages: list = field(default_factory=list)
    languages_group_size:int = 0

In [73]:
def build_colexification_by_lang_group(colexifications_by_language, language_ids):
    result = {}
    for lang, colex_data in colexifications_by_language.items():
        if lang in language_ids:
            for (concept_1, concept_2), pair_info in colex_data.items():
                info = result.get((concept_1, concept_2))
                if info is None:
                    info = GroupColexificationData(languages_group_size=len(language_ids))
                info.n_languages += 1
                info.languages.append(lang)
                result[(concept_1, concept_2)] = info
                
    return result

In [74]:
all_colexifications = build_colexification_by_lang_group(colexifications_by_language, ALL_LANGUAGE_IDS)

In [75]:
len(all_colexifications)

532

In [76]:
CONCEPTS[0]

{'ID': '1-100',
 'Name': 'world',
 'Concepticon_ID': '965',
 'Concepticon_Gloss': 'WORLD',
 'dataset_ID': 'ids',
 'Ontological_Category': 'Person/Thing',
 'Semantic_Field': 'The physical world',
 'Chinese_Gloss': None,
 'Number': None,
 'Spanish': None,
 'Gloss_in_digital_source': None,
 'NorthEuralex_Gloss': None,
 'DIACL_ID': None,
 'SrcId': None,
 'Description': None,
 'Indonesian': None,
 'Elicitation_Notes': None,
 'Core_Set': None,
 'Comment': None}

In [77]:
SLAVIC_SAMPLE

['Serbian-Croatian-Bosnian', 'Czech', 'Polish', 'Russian']

In [78]:
colexification_records = []

for (concept_1, concept_2), info in all_colexifications.items():
    full_concept_1 = CONCEPT_BY_ID[concept_1]
    full_concept_2 = CONCEPT_BY_ID[concept_2]
    
    concept_cid1 = full_concept_1['Concepticon_ID']
    concept_cid2 = full_concept_2['Concepticon_ID']

    concept_g1 = full_concept_1['Concepticon_Gloss']
    concept_g2 = full_concept_2['Concepticon_Gloss']
    
    
    languages = [LANGUAGE_NAME_BY_ID[x] for x in info.languages]
    count = len(languages)
    share = count / len(ALL_LANGUAGE_IDS)
    
    slavic_count = sum(1 for x in languages if x in SLAVIC_SAMPLE)
    slavic_share = slavic_count / len(SLAVIC_SAMPLE)
    
    germanic_count = sum(1 for x in languages if x in GERMANIC_SAMPLE)
    germanic_share = germanic_count / len(GERMANIC_SAMPLE)
#     print((concept_g1, concept_g2), languages)
    
    if concept_cid1 != concept_cid2:
        colexification_records.append(
            {
                'concept_id1': concept_1,
                'concept_id2': concept_2,
                'concept_cid1': concept_cid1,
                'concept_cid2': concept_cid2,
                'concept_gloss1': concept_g1,
                'concept_gloss2': concept_g2,
                'count': count,
                'share': share,
                'count_slavic':slavic_count,
                'share_slavic':slavic_share,
                'count_germanic': germanic_count,
                'share_germanic': germanic_share,
                'share_product_slavic_germanic': slavic_share * germanic_share,
                'languages': languages
            }
        )

In [79]:
len(colexification_records)

530

In [80]:
colexification_records[15].keys()

dict_keys(['concept_id1', 'concept_id2', 'concept_cid1', 'concept_cid2', 'concept_gloss1', 'concept_gloss2', 'count', 'share', 'count_slavic', 'share_slavic', 'count_germanic', 'share_germanic', 'share_product_slavic_germanic', 'languages'])

In [81]:
colexification_records_df = pd.DataFrame(colexification_records)

In [82]:
def filter_keys(d, ks):
    return {k:d[k] for k in ks}

In [83]:
from IPython import display

In [84]:
# concepts_forms_by_language[LANGUAGE_ID_BY_NAME['Russian']]

In [90]:
def show_records(colexification_records, n=20):
    for rec in colexification_records[:n]:
        display.display(filter_keys(rec, ['concept_gloss1', 'concept_gloss2',   'count',
                                     'share', 'count_slavic', 'share_slavic', 
                                     'count_germanic', 'share_germanic', 'share_product_slavic_germanic']))
        rcs = []
        for lang in rec['languages']:
            forms1 = concepts_forms_by_language[LANGUAGE_ID_BY_NAME[lang]][rec['concept_id1']]
            forms2 = concepts_forms_by_language[LANGUAGE_ID_BY_NAME[lang]][rec['concept_id2']]
            rcs.append({
                'language': lang,
                'forms_1': forms1,
                'forms_2': forms2
            })
            
        display.display(pd.DataFrame(rcs))

In [91]:
show_records(sorted(colexification_records, key=lambda r: (-r['share_product_slavic_germanic'])),n=10)

{'concept_gloss1': 'END (OF SPACE)',
 'concept_gloss2': 'END (OF TIME)',
 'count': 9,
 'share': 1.0,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 5,
 'share_germanic': 1.0,
 'share_product_slavic_germanic': 1.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[ende],"[ende, slutning]"
1,Swedish,"[ända, ände]","[ände, slut]"
2,English,[end],[end]
3,Dutch,[einde],"[einde, slot]"
4,German,[Ende],"[Ende, Schluss]"
5,Serbian-Croatian-Bosnian,"[kraj, konac]","[kraj, završetak]"
6,Czech,"[konec, kraj]",[konec]
7,Polish,[koniec],[koniec]
8,Russian,[konec],[konec]


{'concept_gloss1': 'SON-IN-LAW (OF MAN)',
 'concept_gloss2': 'SON-IN-LAW (OF WOMAN)',
 'count': 9,
 'share': 1.0,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 5,
 'share_germanic': 1.0,
 'share_product_slavic_germanic': 1.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[svigersøn],[svigersøn]
1,Swedish,[svär-son],[svär-son]
2,English,[son-in-law],[son-in-law]
3,Dutch,[schoonzoon],[schoonzoon]
4,German,"[Schwiegersohn, Eidam]",[Schwiegersohn]
5,Serbian-Croatian-Bosnian,[zet],[zet]
6,Czech,[zet’],[zet’]
7,Polish,[zięć],[zięć]
8,Russian,[zjat],[zjat]


{'concept_gloss1': 'KNIFE (FOR EATING)',
 'concept_gloss2': 'KNIFE',
 'count': 9,
 'share': 1.0,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 5,
 'share_germanic': 1.0,
 'share_product_slavic_germanic': 1.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[kniv],[kniv]
1,Swedish,"[knif, kniv]","[knif, kniv]"
2,English,[knife],[knife]
3,Dutch,[mes],[mes]
4,German,[Messer],[Messer]
5,Serbian-Croatian-Bosnian,[nož],[nož]
6,Czech,[nůž],[nůž]
7,Polish,[nóż],[nóż]
8,Russian,[nož],[nož]


{'concept_gloss1': 'SKY',
 'concept_gloss2': 'HEAVEN',
 'count': 8,
 'share': 0.8888888888888888,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 4,
 'share_germanic': 0.8,
 'share_product_slavic_germanic': 0.8}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[himmel],[himmel]
1,Swedish,"[himmel, sky]",[himmel]
2,Dutch,[hemel],[hemel]
3,German,[Himmel],[Himmel]
4,Serbian-Croatian-Bosnian,[nebo],[nebo]
5,Czech,"[nebe, obloha]",[nebe]
6,Polish,"[niebo, nieboskłon, firmament]",[niebo]
7,Russian,[nebo],"[nebo, raj]"


{'concept_gloss1': 'BRIGHT',
 'concept_gloss2': 'LIGHT (COLOR)',
 'count': 8,
 'share': 0.8888888888888888,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 4,
 'share_germanic': 0.8,
 'share_product_slavic_germanic': 0.8}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[lys, klar]",[lys]
1,Swedish,"[ljus, blank, klar, glänsande]",[ljus]
2,Dutch,"[helder, glanzend]","[helder, licht]"
3,German,"[hell, glänzend]",[hell]
4,Serbian-Croatian-Bosnian,"[svijetao, sjajan]",[svijetao]
5,Czech,"[jasný, světlý]","[světlý, jasný]"
6,Polish,"[świetny, jasny]",[jasny]
7,Russian,"[svetlyj, jarkij]",[svetlyj]


{'concept_gloss1': 'THINK (REFLECT)',
 'concept_gloss2': 'THINK (BELIEVE)',
 'count': 8,
 'share': 0.8888888888888888,
 'count_slavic': 3,
 'share_slavic': 0.75,
 'count_germanic': 5,
 'share_germanic': 1.0,
 'share_product_slavic_germanic': 0.75}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[tænke],"[mene, tænke, tro]"
1,Swedish,[tänka],"[tycka, mena, tänka, tro]"
2,English,"[think, reflect]",[think]
3,Dutch,[denken],"[menen, denken, geloven]"
4,German,[denken],"[meinen, denken, glauben]"
5,Serbian-Croatian-Bosnian,"[misliti, razmišljati]",[misliti]
6,Czech,[mysliti],"[mysliti, míniti]"
7,Polish,[myśleć],"[myśleć, mniemać]"


{'concept_gloss1': 'DAUGHTER-IN-LAW (OF MAN)',
 'concept_gloss2': 'DAUGHTER-IN-LAW (OF WOMAN)',
 'count': 8,
 'share': 0.8888888888888888,
 'count_slavic': 3,
 'share_slavic': 0.75,
 'count_germanic': 5,
 'share_germanic': 1.0,
 'share_product_slavic_germanic': 0.75}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[svigerdatter],[svigerdatter]
1,Swedish,[svär-dotter],[svär-dotter]
2,English,[daughter-in-law],[daughter-in-law]
3,Dutch,[schoondochter],[schoondochter]
4,German,"[Schwiegertochter, Schnur]",[Schwiegertochter]
5,Serbian-Croatian-Bosnian,"[snaha, snaja]","[snaha, snaja]"
6,Czech,[snacha],[snacha]
7,Polish,[synowa],[synowa]


{'concept_gloss1': 'WE',
 'concept_gloss2': 'WE (INCLUSIVE)',
 'count': 8,
 'share': 0.8888888888888888,
 'count_slavic': 3,
 'share_slavic': 0.75,
 'count_germanic': 5,
 'share_germanic': 1.0,
 'share_product_slavic_germanic': 0.75}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[vi],[vi]
1,Swedish,[vi],[vi]
2,English,[we],[we]
3,Dutch,[wij],[wij]
4,German,[wir],[wir]
5,Serbian-Croatian-Bosnian,[mi],[mi]
6,Czech,[my],[my]
7,Russian,[my],[my]


{'concept_gloss1': 'WE',
 'concept_gloss2': 'WE (EXCLUSIVE)',
 'count': 8,
 'share': 0.8888888888888888,
 'count_slavic': 3,
 'share_slavic': 0.75,
 'count_germanic': 5,
 'share_germanic': 1.0,
 'share_product_slavic_germanic': 0.75}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[vi],[vi]
1,Swedish,[vi],[vi]
2,English,[we],[we]
3,Dutch,[wij],[wij]
4,German,[wir],[wir]
5,Serbian-Croatian-Bosnian,[mi],[mi]
6,Czech,[my],[my]
7,Russian,[my],[my]


{'concept_gloss1': 'WE (INCLUSIVE)',
 'concept_gloss2': 'WE (EXCLUSIVE)',
 'count': 8,
 'share': 0.8888888888888888,
 'count_slavic': 3,
 'share_slavic': 0.75,
 'count_germanic': 5,
 'share_germanic': 1.0,
 'share_product_slavic_germanic': 0.75}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[vi],[vi]
1,Swedish,[vi],[vi]
2,English,[we],[we]
3,Dutch,[wij],[wij]
4,German,[wir],[wir]
5,Serbian-Croatian-Bosnian,[mi],[mi]
6,Czech,[my],[my]
7,Russian,[my],[my]


In [92]:
show_records(sorted(colexification_records, key=lambda r: (r['share_slavic'],-r['share_germanic'])),n=10)

{'concept_gloss1': 'LIGHT (RADIATION)',
 'concept_gloss2': 'LIGHT (COLOR)',
 'count': 4,
 'share': 0.4444444444444444,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 4,
 'share_germanic': 0.8,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[lys],[lys]
1,Swedish,[ljus],[ljus]
2,English,[light],[light]
3,Dutch,[licht],"[helder, licht]"


{'concept_gloss1': 'THINK (BELIEVE)',
 'concept_gloss2': 'BELIEVE',
 'count': 4,
 'share': 0.4444444444444444,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 4,
 'share_germanic': 0.8,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[mene, tænke, tro]",[tro]
1,Swedish,"[tycka, mena, tänka, tro]",[tro]
2,Dutch,"[menen, denken, geloven]",[geloven]
3,German,"[meinen, denken, glauben]",[glauben]


{'concept_gloss1': 'SEA',
 'concept_gloss2': 'LAKE',
 'count': 3,
 'share': 0.3333333333333333,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 3,
 'share_germanic': 0.6,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[hav, sø]","[sø, indsø]"
1,Swedish,"[hav, sjö]","[sjö, in-sjö]"
2,German,"[Meer, See]",[See]


{'concept_gloss1': 'ROUGH (OF SEA)',
 'concept_gloss2': 'ROUGH',
 'count': 3,
 'share': 0.3333333333333333,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 3,
 'share_germanic': 0.6,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[ru],"[ujævn, ru]"
1,English,[rough],[rough]
2,Dutch,[ruw],"[ruw, oneffen]"


{'concept_gloss1': 'LIGHT (RADIATION)',
 'concept_gloss2': 'CANDLE',
 'count': 3,
 'share': 0.3333333333333333,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 3,
 'share_germanic': 0.6,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[lys],[lys]
1,Swedish,[ljus],"[ljus, stearin-ljus, vax-ljus]"
2,German,[Licht],"[Kerze, Licht]"


{'concept_gloss1': 'TURN AROUND',
 'concept_gloss2': 'TWIST (AROUND)',
 'count': 3,
 'share': 0.3333333333333333,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 3,
 'share_germanic': 0.6,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[dreje, sno]",[dreje]
1,Swedish,"[vrida, sno]","[vrida, vrida till, vricka, för-vrida]"
2,German,[drehen],"[drehen, zwirnen, flechten, drillen]"


{'concept_gloss1': 'TOP',
 'concept_gloss2': 'POINTED',
 'count': 3,
 'share': 0.3333333333333333,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 3,
 'share_germanic': 0.6,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[top, spids]",[spids]
1,Dutch,"[spits, top]","[spits, puntig]"
2,German,"[Spitze, Gipfel, das Obere]",[Spitze]


{'concept_gloss1': 'DARK',
 'concept_gloss2': 'OBSCURE',
 'count': 3,
 'share': 0.3333333333333333,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 3,
 'share_germanic': 0.6,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[mørk, dunkel]",[dunkel]
1,Swedish,"[mörk, dunkel]","[dunkel, o-klar, o-tydlig]"
2,German,[dunkel],"[dunkel, unklar]"


{'concept_gloss1': 'SOUL',
 'concept_gloss2': 'MIND',
 'count': 3,
 'share': 0.3333333333333333,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 3,
 'share_germanic': 0.6,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[sjæl, ånd]","[ånd, sind]"
1,Dutch,"[ziel, geest]","[geest, zin]"
2,German,"[Seele, Geist]","[Verstand, Sinn, Geist]"


{'concept_gloss1': 'BONE',
 'concept_gloss2': 'LEG',
 'count': 3,
 'share': 0.3333333333333333,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 3,
 'share_germanic': 0.6,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[ben, knogle]",[ben]
1,Swedish,[ben],[ben]
2,Dutch,"[been, knook]",[been]


In [94]:
show_records([x for x in sorted(colexification_records, key=lambda r: (-r['share_slavic'],r['share_germanic']))
             
             ],n=10)

{'concept_gloss1': 'SHEEP',
 'concept_gloss2': 'EWE',
 'count': 4,
 'share': 0.4444444444444444,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 0,
 'share_germanic': 0.0,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Serbian-Croatian-Bosnian,[ovca],[ovca]
1,Czech,[ovce],"[ovce, bahnice]"
2,Polish,[owca],[owca]
3,Russian,[ovca],[ovca]


{'concept_gloss1': 'SKIN',
 'concept_gloss2': 'LEATHER',
 'count': 4,
 'share': 0.4444444444444444,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 0,
 'share_germanic': 0.0,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Serbian-Croatian-Bosnian,[koža],[koža]
1,Czech,"[kůže, pokožka]",[kůže]
2,Polish,[skóra],[skóra]
3,Russian,"[koža, škura]",[koža]


{'concept_gloss1': 'FOOD',
 'concept_gloss2': 'MEAL',
 'count': 4,
 'share': 0.4444444444444444,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 0,
 'share_germanic': 0.0,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Serbian-Croatian-Bosnian,"[hrana, jelo]","[jelo, objed]"
1,Czech,"[jídlo, pokrm, potrava]",[jídlo]
2,Polish,"[jadło, pokarm, potrawa]","[jedzenie, jadło]"
3,Russian,"[pišča, jeda]",[jeda]


{'concept_gloss1': 'LANGUAGE',
 'concept_gloss2': 'TONGUE',
 'count': 5,
 'share': 0.5555555555555556,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 1,
 'share_germanic': 0.2,
 'share_product_slavic_germanic': 0.2}

Unnamed: 0,language,forms_1,forms_2
0,German,"[Sprache, Zunge]",[Zunge]
1,Serbian-Croatian-Bosnian,[jezik],[jezik]
2,Czech,[jazyk],[jazyk]
3,Polish,[język],[język]
4,Russian,[jazyk],[jazyk]


{'concept_gloss1': "MOTHER'S SISTER",
 'concept_gloss2': "FATHER'S SISTER",
 'count': 5,
 'share': 0.5555555555555556,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 1,
 'share_germanic': 0.2,
 'share_product_slavic_germanic': 0.2}

Unnamed: 0,language,forms_1,forms_2
0,German,[Tante],[Tante]
1,Serbian-Croatian-Bosnian,[tetka],[tetka]
2,Czech,[teta],[teta]
3,Polish,"[ciotka, ciocia]","[ciotka, ciocia]"
4,Russian,"[t ́otja, mamina sestra]","[t ́otja, otcova sestra]"


{'concept_gloss1': 'HIGH',
 'concept_gloss2': 'TALL',
 'count': 6,
 'share': 0.6666666666666666,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 2,
 'share_germanic': 0.4,
 'share_product_slavic_germanic': 0.4}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[høj],"[stor, høj]"
1,Dutch,[hoog],"[lang, hoog]"
2,Serbian-Croatian-Bosnian,[visok],[visok]
3,Czech,[vysoký],[vysoký]
4,Polish,[wysoki],[wysoki]
5,Russian,[vysokij],[vysokij]


{'concept_gloss1': 'UNCLE',
 'concept_gloss2': "FATHER'S BROTHER",
 'count': 6,
 'share': 0.6666666666666666,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 2,
 'share_germanic': 0.4,
 'share_product_slavic_germanic': 0.4}

Unnamed: 0,language,forms_1,forms_2
0,Swedish,"[far-bror, mor-bror, onkel]",[far-bror]
1,German,"[Onkel, Oheim]","[Onkel, Oheim]"
2,Serbian-Croatian-Bosnian,"[stric, ujak, tetak]",[stric]
3,Czech,"[strýc, ujec]",[strýc]
4,Polish,"[stryj, wuj]",[stryj]
5,Russian,[djadja],"[djadja, otcov brat]"


{'concept_gloss1': 'UNCLE',
 'concept_gloss2': "MOTHER'S BROTHER",
 'count': 6,
 'share': 0.6666666666666666,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 2,
 'share_germanic': 0.4,
 'share_product_slavic_germanic': 0.4}

Unnamed: 0,language,forms_1,forms_2
0,Swedish,"[far-bror, mor-bror, onkel]",[mor-bror]
1,German,"[Onkel, Oheim]","[Onkel, Oheim]"
2,Serbian-Croatian-Bosnian,"[stric, ujak, tetak]",[ujak]
3,Czech,"[strýc, ujec]",[ujec]
4,Polish,"[stryj, wuj]","[wuj, wujek]"
5,Russian,[djadja],"[djadja, mamin brat]"


{'concept_gloss1': 'SKY',
 'concept_gloss2': 'HEAVEN',
 'count': 8,
 'share': 0.8888888888888888,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 4,
 'share_germanic': 0.8,
 'share_product_slavic_germanic': 0.8}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[himmel],[himmel]
1,Swedish,"[himmel, sky]",[himmel]
2,Dutch,[hemel],[hemel]
3,German,[Himmel],[Himmel]
4,Serbian-Croatian-Bosnian,[nebo],[nebo]
5,Czech,"[nebe, obloha]",[nebe]
6,Polish,"[niebo, nieboskłon, firmament]",[niebo]
7,Russian,[nebo],"[nebo, raj]"


{'concept_gloss1': 'BRIGHT',
 'concept_gloss2': 'LIGHT (COLOR)',
 'count': 8,
 'share': 0.8888888888888888,
 'count_slavic': 4,
 'share_slavic': 1.0,
 'count_germanic': 4,
 'share_germanic': 0.8,
 'share_product_slavic_germanic': 0.8}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[lys, klar]",[lys]
1,Swedish,"[ljus, blank, klar, glänsande]",[ljus]
2,Dutch,"[helder, glanzend]","[helder, licht]"
3,German,"[hell, glänzend]",[hell]
4,Serbian-Croatian-Bosnian,"[svijetao, sjajan]",[svijetao]
5,Czech,"[jasný, světlý]","[světlý, jasný]"
6,Polish,"[świetny, jasny]",[jasny]
7,Russian,"[svetlyj, jarkij]",[svetlyj]


In [93]:
show_records([x for x in sorted(colexification_records, key=lambda r: (-r['share_germanic'],r['share_slavic']))
             
             if x['count_slavic'] <= 0 and ('English' in x['languages'])],n=10)

{'concept_gloss1': 'LIGHT (RADIATION)',
 'concept_gloss2': 'LIGHT (COLOR)',
 'count': 4,
 'share': 0.4444444444444444,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 4,
 'share_germanic': 0.8,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[lys],[lys]
1,Swedish,[ljus],[ljus]
2,English,[light],[light]
3,Dutch,[licht],"[helder, licht]"


{'concept_gloss1': 'ROUGH (OF SEA)',
 'concept_gloss2': 'ROUGH',
 'count': 3,
 'share': 0.3333333333333333,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 3,
 'share_germanic': 0.6,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[ru],"[ujævn, ru]"
1,English,[rough],[rough]
2,Dutch,[ruw],"[ruw, oneffen]"


{'concept_gloss1': 'PLANT (VEGETATION)',
 'concept_gloss2': 'PLANT (SOMETHING)',
 'count': 2,
 'share': 0.2222222222222222,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 2,
 'share_germanic': 0.4,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,"[plante, urt]",[plante]
1,English,[plant],[plant]


{'concept_gloss1': 'FAN (OBJECT)',
 'concept_gloss2': 'FAN ACTION',
 'count': 2,
 'share': 0.2222222222222222,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 2,
 'share_germanic': 0.4,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Danish,[vifte],[vifte]
1,English,[fan],[fan]


{'concept_gloss1': 'TRAP (PITFALL)',
 'concept_gloss2': 'TRAP (CATCH)',
 'count': 2,
 'share': 0.2222222222222222,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 2,
 'share_germanic': 0.4,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Swedish,"[fälla, snara]","[sätta ut fällor, locka i fällan, snara, sn..."
1,English,[trap],[trap]


{'concept_gloss1': 'DRINK',
 'concept_gloss2': 'BEVERAGE',
 'count': 2,
 'share': 0.2222222222222222,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 2,
 'share_germanic': 0.4,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,Swedish,[dricka],"[dryck, dricka]"
1,English,[drink],"[beverage, drink]"


{'concept_gloss1': 'THOU',
 'concept_gloss2': 'YOU',
 'count': 2,
 'share': 0.2222222222222222,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 2,
 'share_germanic': 0.4,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,English,[you],[you]
1,German,"[du, Sie]","[ihr, Sie]"


{'concept_gloss1': 'LAND',
 'concept_gloss2': 'LAND (DESCEND)',
 'count': 1,
 'share': 0.1111111111111111,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 1,
 'share_germanic': 0.2,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,English,"[earth, land]",[land]


{'concept_gloss1': 'PLAIN',
 'concept_gloss2': 'CLEAR',
 'count': 1,
 'share': 0.1111111111111111,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 1,
 'share_germanic': 0.2,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,English,"[plain, field]","[clear, plain]"


{'concept_gloss1': 'SPRING OR WELL',
 'concept_gloss2': 'SPRINGTIME',
 'count': 1,
 'share': 0.1111111111111111,
 'count_slavic': 0,
 'share_slavic': 0.0,
 'count_germanic': 1,
 'share_germanic': 0.2,
 'share_product_slavic_germanic': 0.0}

Unnamed: 0,language,forms_1,forms_2
0,English,"[spring, well]",[spring]
