In [1]:
# Import necessary libraries
import nltk
from nltk.parse.dependencygraph import DependencyGraph
from nltk import Tree
nltk.download('punkt')
import spacy
from spacy import displacy

# Load the English language model using spaCy
nlp = spacy.load("en_core_web_sm")

# Define the input sentence
sentence = "John saw Mary"

# Parse the input sentence with spaCy
doc = nlp(sentence)

# Render and display the dependency tree using spaCy's displacy
displacy.render(doc, style='dep', jupyter=True)

# Define a function to convert spaCy's parsed document into CoNLL format
def spacy_to_conll(doc):
    conll_format = ""
    for token in doc:
        head_id = token.head.i + 1 if token.head is not None else 0
        conll_format += f"{token.i+1}\t{token.text}\t{token.lemma_}\t{token.pos_}\t{token.tag_}\t_\t{head_id}\t{token.dep_}\t_\t_\n"
    return conll_format

# Convert the parsed document into CoNLL format
conll_output = spacy_to_conll(doc)
print(conll_output)

# Constants for transition actions
SHIFT = 'SHIFT'
LEFT_ARC = 'LEFT-ARC'
RIGHT_ARC = 'RIGHT-ARC'
REDUCE = 'REDUCE'

# Define a function to check if a reduction action is applicable
def reduce(stack, buffer, dependencies):
    if len(stack) > 1:
        top_stack, second_stack = stack[-1], stack[-2]

        if any(token[2] == top_stack[0] for token in stack[:-1]):
            return True
    return False

# Define a function to determine the next transition action
def oracle(stack, buffer, dependencies):
    if len(stack) > 1:
        top_stack, second_stack = stack[-1], stack[-2]

        if second_stack and top_stack[0] == second_stack[2]:
            return LEFT_ARC
        elif top_stack and second_stack and top_stack[2] == second_stack[0]:
            return RIGHT_ARC
        elif reduce(stack, buffer, dependencies):
            return REDUCE
    if buffer:
        return SHIFT
    return None

# Define a function to perform transition-based dependency parsing
def transition_based_dependency_parse(sentence):
    doc = nlp(sentence)
    stack = [(0, 'ROOT', None)]
    buffer = [(i+1, token.text, token.head.i+1) for i, token in enumerate(doc)]
    transitions = []
    dependencies = []

    first_shift_removed = False

    while buffer or len(stack) > 1:
        # Get the next transition action using the oracle
        transition = oracle(stack, buffer, dependencies)
        if transition is None:
            break
        if transition == SHIFT and not first_shift_removed:
            first_shift_removed = True
        else:
            transitions.append(transition)
        if transition == SHIFT and buffer:
            stack.append(buffer.pop(0))
        elif transition == LEFT_ARC and len(stack) > 1:
            dependencies.append((stack[-2][1], stack[-1][1]))
            stack.pop(-2)
        elif transition == RIGHT_ARC and len(stack) > 1:
            dependencies.append((stack[-1][1], stack[-2][1]))
            stack.pop()
        elif transition == REDUCE and len(stack) > 1:
            stack.pop()

    return dependencies, transitions

# Example usage of the transition-based dependency parsing function
dependencies, transitions = transition_based_dependency_parse(sentence)

# Print the dependencies in a readable format
print("Dependencies:")
for token in doc:
    print(f"{token.text} <--{token.dep_}-- {token.head.text}")

# Print the transition actions
print("\nTransitions:")
for trans in transitions:
    print(trans)

# Save the output to a file
from google.colab import files
output_file_path = "dependency_parse_output.txt"
with open(output_file_path, 'w') as output_file:
    output_file.write("Dependencies:\n")
    for token in doc:
        output_file.write(f"{token.text} <--{token.dep_}-- {token.head.text}\n")

    output_file.write("\nTransitions:\n")
    for trans in transitions:
        output_file.write(f"{trans}\n")

# Download the output file
files.download(output_file_path)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


1	John	John	PROPN	NNP	_	2	nsubj	_	_
2	saw	see	VERB	VBD	_	2	ROOT	_	_
3	Mary	Mary	PROPN	NNP	_	2	dobj	_	_

Dependencies:
John <--nsubj-- saw
saw <--ROOT-- saw
Mary <--dobj-- saw

Transitions:
SHIFT
LEFT-ARC
SHIFT
RIGHT-ARC


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>