In [3]:
#pip install benepar
#pip install protobuf==3.20.0

import spacy
import numpy as np

class Parser():
    
    def __init__(self,):
        model = 'fr_core_news_sm'
        if not spacy.util.is_package(model):
            spacy.cli.download(model)
    
        self.nlp = spacy.load(model)
    
    def process(self, sentence):
        doc = self.nlp(sentence)
        assert len(list(doc.sents)) == 1
        sent = list(doc.sents)[0]
        return sent
        
class DependencyParser(Parser):
    def parse(self, sentence):
        sent = self.process(sentence)
        closeds = []
        for current in range(1, len(sent)+1):
            closed = 0
            for position, word in enumerate(sent): # [:current]
                closed += self._is_closed(word, current)
            closeds.append(closed)
        
        closing = np.r_[np.diff(closeds), closeds[-1]]
        return list(zip(sent, closing))
    
    def _is_closed(self, node, position):
        if node.i > position:
            return False
        
        for child in node.children:
            if child.i > position:
                return False
            if not self._is_closed(child, position):
                return False

        return True

class ConstituentParser(Parser):
    def __init__(self):
        import os
        import benepar
        super().__init__()
        ben_model = 'benepar_fr2'
        benepar.download(ben_model)
        os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
        self.nlp.add_pipe(benepar.BeneparComponent(ben_model))
        
    def parse(self, sentence):
        
        sent = self.process(sentence)
        tree = sent._.parse_string
        opening = 0
        labels = []
        for here in sent._.parse_string.split('('):
            split = here.split()
            if len(split):
                opening += 1
                closing = 0
                if len(split)>1:
                    word = split[1].strip(')')
                    closing = split[1].count(')')
                    labels.append((word, closing))
                    opened = 0
        return labels

#const_parser = ConstituentParser()

dep_parser = DependencyParser()

sentence = "les petits chats de Mamie suivent la souris verte."

#print(const_parser.parse(sentence))

print(dep_parser.parse(sentence))

Collecting fr-core-news-sm==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.3.0/fr_core_news_sm-3.3.0-py3-none-any.whl (16.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.3/16.3 MB 4.6 MB/s eta 0:00:00
Installing collected packages: fr-core-news-sm
Successfully installed fr-core-news-sm-3.3.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('fr_core_news_sm')


[nltk_data] Downloading package benepar_fr2 to /home/co/nltk_data...
[nltk_data]   Unzipping models/benepar_fr2.zip.


ValueError: [E966] `nlp.add_pipe` now takes the string name of the registered component factory, not a callable component. Expected string, but got <benepar.integrations.spacy_plugin.BeneparComponent object at 0x7fe024f1add0> (name: 'None').

- If you created your component with `nlp.create_pipe('name')`: remove nlp.create_pipe and call `nlp.add_pipe('name')` instead.

- If you passed in a component like `TextCategorizer()`: call `nlp.add_pipe` with the string name instead, e.g. `nlp.add_pipe('textcat')`.

- If you're using a custom component: Add the decorator `@Language.component` (for function components) or `@Language.factory` (for class components / factories) to your custom component and assign it a name, e.g. `@Language.component('your_name')`. You can then run `nlp.add_pipe('your_name')` to add it to the pipeline.

In [5]:
dep_parser = DependencyParser()

sentence = "les petits chats de Mamie suivent la souris verte."

print(dep_parser.parse(sentence))

[(les, 0), (petits, 1), (chats, 2), (de, 0), (Mamie, 1), (suivent, 0), (la, 2), (souris, 2), (verte, 0), (., 10)]
