In [2]:
import pandas as pd
import sh

I made the file below, etymwn-smaller.tsv, by running these unix commands: 

First, get only those entries with the relation "rel:etymology": 

```sh
    grep "rel:etymology" etymwn.tsv > etymwn-small.tsv 
```

Now we can remove the relation column, since it's all "rel:etymology": 

```sh
    cat etymwn-small.tsv | cut -f1,3 > etymwn-smaller.tsv
```

In [3]:
etymwn=pd.read_csv('etymwn-smaller.tsv', sep='\t')
etymwn.columns = ['word', 'parent']

In [4]:
class Etymwn(pd.DataFrame): 
    def lookup(self, word, language="eng"): 
        # Finds the first-generation ancestor(s) of a word. 
        return etymwn[etymwn['word']==language + ": " + word]['parent'].tolist()
    def lookupNonEng(self, word, language="eng"): 
        # Look up non-English roots of words, so that, e.g. looking up "cannot":
        # lookup("cannot") -> ["eng: can", "eng: not"]
        #   | -> lookup("can") -> ['enm: can', 'enm: canne']
        parents = self.lookup(word, language)
        
    def lookupAll(self, word, language="eng"): 
        # Data Structure: lookupAll("cannot") 
        # { 'gen1': { 'eng: can': ['enm: can', 'enm: canne'] }, 
        #   'gen2': { 'enm: can': ['ang: can'], 
        #             'enm: canne': ['ang: canne'] }
        # }
        parents = self.lookup(word, language)
        etym = { 'gen1': { word: parents }}
        if len(parents) == 1: 
            etym.update( { 'gen2': { parents[0] }})
        else: 
            for parent in parents: 
                etym.update( { 'gen2': { parent: self.lookup(parent, language) } } )
        return etym
        

In [5]:
myetymwn = Etymwn(etymwn)

In [6]:
obj = myetymwn.lookupAll("cannot")

In [9]:
etymwn

Unnamed: 0,word,parent
0,afr: Japan,nld: Japan
1,afr: Maandag,nld: maandag
2,afr: Maleis,nld: Maleis
3,afr: Pieter,nld: Pieter
4,afr: Woensdag,afr: woensdag
5,afr: aalmoes,nld: aalmoes
6,afr: aalmoesenier,nld: aalmoezenier
7,afr: aalwee,nld: aloë
8,afr: aambeeld,nld: aambeeld
9,afr: aambei,nld: aambei


In [245]:
myetymwn.lookup("not")

['enm: not']

In [227]:
d = {'apples': 'delicious', 'bananas': 'ok'}

In [228]:
e = {'pineapples': 'wtf', 'oranges': 'yes please'}

In [231]:
d.update(e)

In [232]:
d

{'apples': 'delicious',
 'bananas': 'ok',
 'oranges': 'yes please',
 'pineapples': 'wtf'}