## Morphalou data: from XML to DataFrame, to .csv  
In this Notebook: code from which the documents [all_nouns.csv](all_nouns.csv), [all_verbs.csv](all_verbs.csv) and [all_adjectives.csv](all_adjectives.csv) were generated from initial [Morphalou3 data](data).

### 1. Nouns

In [3]:
import xml.etree.ElementTree as et
import pandas as pd
%matplotlib inline

xtree = et.parse("data/commonNoun_Morphalou3_LMF.xml")
xroot = xtree.getroot()

dict_list = []
counter = 0

for le in xroot.iter('lexicalEntry'):
    le_dict = {}
    for lemma in le.iter('lemmatizedForm'):
        le_dict['lemma'] = lemma.find('orthography').text
        try:
            le_dict['gender'] = lemma.find('grammaticalGender').text
        except:
            pass
        le_dict['category'] = lemma.find('grammaticalCategory').text
        
    for form in le.iter('inflectedForm'):
        gn = form.find('grammaticalNumber').text
        le_dict[gn] = form.find('orthography').text
    
    dict_list.append(le_dict)
    counter += 1
    if counter % 1000 == 0:
        print('.', end='')

nouns_df = pd.DataFrame(dict_list)
nouns_df

......................................................................................................

Unnamed: 0,lemma,gender,category,invariable,singular,plural
0,100-mètres,masculine,commonNoun,100-mètres,,
1,2D,feminine,commonNoun,2D,,
2,3D,feminine,commonNoun,3D,,
3,A,masculine,commonNoun,µA,,
4,a,masculine,commonNoun,a,,
...,...,...,...,...,...,...
102220,φ,masculine,commonNoun,φ,,
102221,χ,masculine,commonNoun,χ,,
102222,ψ,masculine,commonNoun,ψ,,
102223,ω,masculine,commonNoun,ω,,


In [4]:
nouns_df.to_csv('all_nouns.csv')

## 2. Verbs

In [9]:
xtree = et.parse("data/verb_Morphalou3_LMF.xml")
xroot = xtree.getroot()

dict_list = []
counter = 0

for le in xroot.iter('lexicalEntry'):
    le_dict = {}
    for lemma in le.iter('lemmatizedForm'):
        le_dict['lemma'] = lemma.find('orthography').text
       
        
    for form in le.iter('inflectedForm'):
        try:
            gn = form.find('grammaticalNumber').text  
        
        except:
            pass
        
        try:
            gm = form.find('grammaticalMood').text
       
        except:
            pass

        try:
            gt = form.find('grammaticalTense').text
            
        except:
            pass
        
        try:
            gp = form.find('grammaticalPerson').text
            
        except:
            pass
        try:
            gg=form.find('grammaticalGender').text
           
        except:
            pass

        try:
            le_dict[gn,gm,gt,gp,gg] = form.find('orthography').text
        except:
            pass
        
       
    
    dict_list.append(le_dict)
    counter += 1
    if counter % 1000 == 0:
        print('.', end='')

verbs_df = pd.DataFrame(dict_list)
verbs_df

..............

Unnamed: 0,lemma,"(singular, indicative, simplePast, thirdPerson, masculine)","(singular, indicative, simplePast, firstPerson, masculine)","(plural, indicative, imperfect, thirdPerson, masculine)","(singular, indicative, imperfect, secondPerson, masculine)","(singular, indicative, imperfect, thirdPerson, masculine)","(singular, participle, present, thirdPerson, masculine)","(singular, indicative, simplePast, secondPerson, masculine)","(singular, subjunctive, imperfect, firstPerson, masculine)","(plural, subjunctive, imperfect, thirdPerson, masculine)",...,"(invariable, participle, present, thirdPerson, invariable)","(plural, infinitive, imperfect, firstPerson, invariable)","(invariable, indicative, present, thirdPerson, masculine)","(invariable, infinitive, present, thirdPerson, masculine)","(plural, participle, present, secondPerson, feminine)","(singular, infinitive, imperfect, thirdPerson, feminine)","(plural, infinitive, past, firstPerson, masculine)","(invariable, infinitive, past, secondPerson, invariable)","(invariable, indicative, present, thirdPerson, feminine)","(plural, infinitive, present, secondPerson, invariable)"
0,abaisser,abaissa,abaissai,abaissaient,abaissais,abaissait,abaissant,abaissas,abaissasse,abaissassent,...,,,,,,,,,,
1,abalober,,,,,,,,,,...,,,,,,,,,,
2,abalourdir,,,,,,,,,,...,,,,,,,,,,
3,abandonner,abandonna,abandonnai,abandonnaient,abandonnais,abandonnait,abandonnant,abandonnas,abandonnasse,abandonnassent,...,,,,,,,,,,
4,abasourdir,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14757,îloter,,,,,,,,,,...,,,,,,,,,,
14758,ôter,ôta,ôtai,ôtaient,ôtais,ôtait,ôtant,ôtas,ôtasse,ôtassent,...,,,,,,,,,,
14759,œiller,,,,,,,,,,...,,,,,,,,,,
14760,œilletonner,,,,,,,,,,...,,,,,,,,,,


In [10]:
verbs_df.to_csv('all_verbs.csv')

## 3. Adjectives

In [11]:
xtree = et.parse("data/adjective_Morphalou3_LMF.xml")
xroot = xtree.getroot()

dict_list = []
counter = 0

for le in xroot.iter('lexicalEntry'):
    le_dict = {}
    for lemma in le.iter('lemmatizedForm'):
        le_dict['lemma'] = lemma.find('orthography').text
       
        
    for form in le.iter('inflectedForm'):
        try:
            gn = form.find('grammaticalNumber').text  
        
        except:
            pass
        
        try:
            gg=form.find('grammaticalGender').text
           
        except:
            pass

        try:
            le_dict[gn,gg] = form.find('orthography').text
        except:
            pass
        
       
    
    dict_list.append(le_dict)
    counter += 1
    if counter % 1000 == 0:
        print('.', end='')

adjectives_df = pd.DataFrame(dict_list)
adjectives_df

....................................

Unnamed: 0,lemma,"(singular, masculine)","(plural, masculine)","(singular, feminine)","(plural, feminine)","(singular, invariable)","(plural, invariable)","(invariable, masculine)","(invariable, invariable)","(invariable, feminine)"
0,a-humain,a-humain,,,,,,,,
1,a-raciste,a-raciste,,,,,,,,
2,aalénien,aalénien,aaléniens,aalénienne,aaléniennes,,,,,
3,aaronide,aaronide,,,,,,,,
4,abactérien,abactérien,abactériens,abactérienne,abactériennes,,,,,
...,...,...,...,...,...,...,...,...,...,...
36518,œstrogène,,,,,œstrogène,œstrogènes,,,
36519,œstrogénique,,,,,œstrogénique,œstrogéniques,,,
36520,œstromane,,,,,œstromane,œstromanes,,,
36521,œstroprogestatif,œstroprogestatif,œstroprogestatifs,œstroprogestative,œstroprogestatives,,,,,


In [12]:
adjectives_df.to_csv("all_adjectives.csv")