In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import spacy
import functools
import re

In [None]:
df = pd.read_csv('data/SEM-2012-SharedTask-CD-SCO-dev-simple.v2.txt', sep='\t', header=None)

In [None]:
# choose rows with wisteria01
df = df[df[0] == 'wisteria01']
# sentence 3
df = df[df[1] == 3]

In [None]:
df.head()

Unnamed: 0,0,1,2,3,4
53,wisteria01,3,0,He,O
54,wisteria01,3,1,made,O
55,wisteria01,3,2,no,B-NEG
56,wisteria01,3,3,remark,O
57,wisteria01,3,4,",",O


In [None]:
# rename columns in df
column_dict = {0: 'Chapter', 1: 'Sent_id', 2: 'Token_id', 3: 'Token', 4: 'Gold Label'}
df.rename(columns=column_dict, inplace=True)
# add new columns
df.insert(len(df.columns)-1, 'Dependency', None)
df.insert(len(df.columns)-1, 'Head', None)
df.insert(len(df.columns)-1, 'token-ROOT_path', None)

In [None]:
df.head()

Unnamed: 0,Chapter,Sent_id,Token_id,Token,Dependency,Head,token-ROOT_path,Gold Label
53,wisteria01,3,0,He,,,,O
54,wisteria01,3,1,made,,,,O
55,wisteria01,3,2,no,,,,B-NEG
56,wisteria01,3,3,remark,,,,O
57,wisteria01,3,4,",",,,,O


In [None]:
def get_dependency_features(df_val: pd.core.series.Series, rendering: bool = False):
    sentence = functools.reduce(lambda x, y: x + ' ' + y, df_val.values)
    sentence = re.sub(r'\s+(?=[,.])', '', sentence)

    print('sentence', sentence)
    print('\n')

    nlp = spacy.load('en_core_web_sm')
    doc = nlp(sentence)
    tokenization = [token for token in doc]
    print("Tokenization: ",tokenization)
    print('\n')
    if rendering:
        print('printing dependency tree')
        spacy.displacy.render(doc, style="dep")
        print('\n')

    dependency_labels = [token.dep_ for token in doc]
    heads = [token.head.text for token in doc]
    path_lengths = [len(list(token.ancestors)) for token in doc]

    return dependency_labels, heads, path_lengths

In [None]:

dependency_labels, heads, path_lengths = get_dependency_features(df['Token'], True)

print('dependency_labels', dependency_labels)
print('\n')
print('heads', heads)
print('\n')
print('paths', path_lengths)


sentence He made no remark, but the matter remained in his thoughts, for he stood in front of the fire afterwards with a thoughtful face, smoking his pipe, and casting an occasional glance at the message.


Tokenization:  [He, made, no, remark, ,, but, the, matter, remained, in, his, thoughts, ,, for, he, stood, in, front, of, the, fire, afterwards, with, a, thoughtful, face, ,, smoking, his, pipe, ,, and, casting, an, occasional, glance, at, the, message, .]


printing dependency tree




dependency_labels ['nsubj', 'ROOT', 'det', 'dobj', 'punct', 'cc', 'det', 'nsubj', 'conj', 'prep', 'poss', 'pobj', 'punct', 'mark', 'nsubj', 'advcl', 'prep', 'pobj', 'prep', 'det', 'pobj', 'advmod', 'prep', 'det', 'amod', 'pobj', 'punct', 'advcl', 'poss', 'dobj', 'punct', 'cc', 'conj', 'det', 'amod', 'dobj', 'prep', 'det', 'pobj', 'punct']


heads ['made', 'made', 'remark', 'made', 'made', 'made', 'matter', 'remained', 'made', 'remained', 'thoughts', 'in', 'remained', 'stood', 'stood', 'remained', 'stood', 'in', 'front', 'fire', 'of', 'stood', 'stood', 'face', 'face', 'with', 'stood', 'stood', 'pipe', 'smoking', 'smoking', 'smoking', 'smoking', 'glance', 'glance', 'casting', 'glance', 'message', 'at', 'remained']


paths [1, 0, 2, 1, 1, 1, 3, 2, 1, 2, 4, 3, 2, 3, 3, 2, 3, 4, 5, 7, 6, 3, 3, 5, 5, 4, 3, 3, 5, 4, 4, 4, 4, 6, 6, 5, 6, 8, 7, 2]


In [None]:
df['Dependency'] = dependency_labels
df['Head'] = heads
df['token-ROOT_path'] = path_lengths

In [None]:
df

Unnamed: 0,Chapter,Sent_id,Token_id,Token,Dependency,Head,token-ROOT_path,Gold Label
53,wisteria01,3,0,He,nsubj,made,1,O
54,wisteria01,3,1,made,ROOT,made,0,O
55,wisteria01,3,2,no,det,remark,2,B-NEG
56,wisteria01,3,3,remark,dobj,made,1,O
57,wisteria01,3,4,",",punct,made,1,O
58,wisteria01,3,5,but,cc,made,1,O
59,wisteria01,3,6,the,det,matter,3,O
60,wisteria01,3,7,matter,nsubj,remained,2,O
61,wisteria01,3,8,remained,conj,made,1,O
62,wisteria01,3,9,in,prep,remained,2,O


# Theoretical Answer


Dependency features can be useful in a variety of NLP tasks, such as syntactic parsing, named entity recognition, and sentiment analysis, among others.

One example of a task where dependency features are useful is syntactic parsing, which involves identifying the grammatical structure of a sentence. Dependency features, such as the length of the path from each token to the root of the sentence, can provide important information about the grammatical relationships between words in a sentence, such as subject or object. This can be useful for tasks such as machine translation, text summarization, and information extraction.