# A first approach on the Spacy library

In [12]:
# Spacy library
# In terminal:
# pip install spacy
import spacy

## French

In [14]:
# In terminal:
# spacy download fr_core_news_sm
nlp = spacy.load("fr_core_news_sm")
# nlp = spacy.load("en_core_web_sm")

### Trying some sentence in French

In [15]:
doc = nlp("Elle voulut aller sur le bord de la mer")

In [16]:
[token.text for token in doc]

['Elle', 'voulut', 'aller', 'sur', 'le', 'bord', 'de', 'la', 'mer']

In [17]:
[token.pos_ for token in doc]

['PRON', 'VERB', 'VERB', 'ADP', 'DET', 'NOUN', 'ADP', 'DET', 'VERB']

In [18]:
[token.tag_ for token in doc]

['PRON', 'VERB', 'VERB', 'ADP', 'DET', 'NOUN', 'ADP', 'DET', 'VERB']

In [19]:
[token.dep_ for token in doc]

['nsubj', 'ROOT', 'xcomp', 'case', 'det', 'obl:arg', 'case', 'obj', 'obl:arg']

In [20]:
spacy.explain("xcomp")

'open clausal complement'

In [21]:
spacy.explain("case")

'case marking'

In [22]:
spacy.explain("obl")

'oblique nominal'

In [23]:
# spacy.explain("obl:arg")
# spacy.explain("arg")

# Both failed

In [24]:
nlp.meta["sources"]

[{'name': 'UD French Sequoia v2.8',
  'url': 'https://github.com/UniversalDependencies/UD_French-Sequoia',
  'license': 'LGPL-LR',
  'author': 'Candito, Marie; Seddah, Djamé; Perrier, Guy; Guillaume, Bruno'},
 {'name': 'WikiNER',
  'url': 'https://figshare.com/articles/Learning_multilingual_named_entity_recognition_from_Wikipedia/5462500',
  'license': 'CC BY 4.0',
  'author': 'Joel Nothman, Nicky Ringland, Will Radford, Tara Murphy, James R Curran'},
 {'name': 'spaCy lookups data',
  'author': 'Explosion',
  'url': 'https://github.com/explosion/spacy-lookups-data',
  'license': 'MIT'}]

In [25]:
# Syntactic head token
[token.head.text for token in doc]

['voulut', 'voulut', 'voulut', 'bord', 'bord', 'aller', 'mer', 'mer', 'aller']

In [26]:
[chunk.text for chunk in doc.noun_chunks]

['Elle', 'le bord']

In [27]:
from spacy import displacy

In [28]:
displacy.render(doc, style="dep")

In [29]:
# displacy.render(doc, style="ent")
# Useless in the case of my sentence

### Trying an smaller group of words

In [30]:
doc = nlp("Sur le bord de la mer")

In [31]:
displacy.render(doc, style="dep")

In [32]:
spacy.explain("nmod")

'modifier of nominal'

Plus satisfaisant quand on isole cette partie

### trf does not work for now

In [53]:
import spacy_transformers

In [49]:
# In terminal:
# spacy download fr_dep_news_trf
# nlp2 = spacy.load('fr_dep_news_trf')

In [50]:
# doc2 = nlp2("Elle voulut aller sur le bord de la mer")

In [40]:
# displacy.render(doc2, style="dep")

Problème avec sentencepiece, laissé de côté pour le moment

## German

In [57]:
# In terminal:
# spacy download de_core_news_sm
# spacy download de_dep_news_trf
# nlpDE = spacy.load("de_dep_news_trf")
nlpDE = spacy.load("de_core_news_sm")

In [66]:
docDE = nlpDE("Ich habe Bambus geschnitten.")
docDE2 = nlpDE("Ein Nichts waren wir, sind wir, werden wir bleiben, blühend.")
# Ein Nichts waren wir, sind wir, werden wir bleiben, blühend.

In [64]:
[token.text for token in docDE]

['Ich', 'habe', 'Bambus', 'geschnitten', '.']

In [65]:
[token.pos_ for token in docDE]

['PRON', 'AUX', 'NOUN', 'VERB', 'PUNCT']

In [67]:
[token.pos_ for token in docDE2]

['DET',
 'PRON',
 'AUX',
 'PRON',
 'PUNCT',
 'AUX',
 'PRON',
 'PUNCT',
 'AUX',
 'PRON',
 'VERB',
 'PUNCT',
 'ADV',
 'PUNCT']

In [68]:
displacy.render(docDE, style="dep")

In [69]:
print(spacy.explain("oc"))
print(spacy.explain("oa"))

clausal object
accusative object


In [70]:
displacy.render(docDE2, style="dep")

In [71]:
print(spacy.explain("pd"))
print(spacy.explain("nk"))
print(spacy.explain("cj"))

predicate
noun kernel element
conjunct


In [72]:
print(docDE2[2].morph)

Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin


In [74]:
[(token.i) for token in docDE]

[0, 1, 2, 3, 4]

In [82]:
Is_Verb = [(token.pos_ == 'VERB') for token in docDE]
Is_Verb

[False, False, False, True, False]

In [86]:
import pandas as pd

In [87]:
Cat = pd.DataFrame({"V": Is_Verb})

In [90]:
satz = pd.DataFrame({"cat" : [(token.pos_) for token in docDE]})

In [97]:
satz[satz.cat == "VERB"]

Unnamed: 0,cat
3,VERB
