<a href="https://colab.research.google.com/github/Sathishk2/Natural-Language-Processing/blob/main/Dependency_Distance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import spacy

# Load the spaCy model (use 'en_core_web_sm' for small English model)
nlp = spacy.load('en_core_web_sm')

# Function to calculate dependency distance
def calculate_dependency_distance(sentence):
    doc = nlp(sentence)

    distances = []

    for token in doc:
        # The distance between the token and its head
        distance = abs(token.i - token.head.i)
        distances.append((token.text, token.head.text, distance))

    return distances

# Example sentence
sentence = "The quick brown fox jumps over the lazy dog."

# Calculate dependency distances
dependency_distances = calculate_dependency_distance(sentence)

# Print results
for token, head, distance in dependency_distances:
    print(f"Token: {token}, Head: {head}, Distance: {distance}")


Token: The, Head: fox, Distance: 3
Token: quick, Head: fox, Distance: 2
Token: brown, Head: fox, Distance: 1
Token: fox, Head: jumps, Distance: 1
Token: jumps, Head: jumps, Distance: 0
Token: over, Head: jumps, Distance: 1
Token: the, Head: dog, Distance: 2
Token: lazy, Head: dog, Distance: 1
Token: dog, Head: over, Distance: 3
Token: ., Head: jumps, Distance: 5


In [None]:
import spacy
!pip install textdescriptives
import textdescriptives as td
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("textdescriptives/dependency_distance")
doc = nlp("The world is changed. I feel it in the water. I feel it in the earth. I smell it in the air. Much that once was is lost, for none now live who remember it.")

# all attributes are stored as a dict in the ._.dependency_distance attribute
doc._.dependency_distance

# access span and token level dependency distance in the same way
doc[:3]._.dependency_distance
doc[1]._.dependency_distance

# extract to dataframe
td.extract_df(doc)

Collecting textdescriptives
  Downloading textdescriptives-2.8.2-py3-none-any.whl.metadata (24 kB)
Collecting pyphen>=0.11.0 (from textdescriptives)
  Downloading pyphen-0.16.0-py3-none-any.whl.metadata (3.2 kB)
Collecting ftfy>=6.0.3 (from textdescriptives)
  Downloading ftfy-6.2.3-py3-none-any.whl.metadata (7.8 kB)
Collecting spacy-lookups-data<1.1.0,>=1.0.3 (from spacy[lookups]>=3.6.0->textdescriptives)
  Downloading spacy_lookups_data-1.0.5-py2.py3-none-any.whl.metadata (4.8 kB)
Downloading textdescriptives-2.8.2-py3-none-any.whl (254 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.3/254.3 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ftfy-6.2.3-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.0/43.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-0.16.0-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m41.8 MB/s[0m eta 

Unnamed: 0,text,dependency_distance_mean,dependency_distance_std,prop_adjacent_dependency_relation_mean,prop_adjacent_dependency_relation_std
0,The world is changed. I feel it in the water. ...,1.961905,0.896149,0.430476,0.105435


In [None]:
import spacy
import pandas as pd

# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

# Preprocessor function for calculating dependency distances
def dependency_distance_preprocessor(text):
    doc = nlp(text)

    distances = []
    for token in doc:
        # Calculate the distance between the token and its head
        distance = abs(token.i - token.head.i)
        distances.append(distance)

    # You can return the average distance for the sentence or the list itself
    avg_distance = sum(distances) / len(distances) if distances else 0
    return avg_distance

# Example: Apply to a DataFrame
data = {'text': ["The quick brown fox jumps over the lazy dog.",
                 "This is another example sentence."]}
df = pd.DataFrame(data)

# Apply the preprocessor to each row
df['dependency_distance'] = df['text'].apply(dependency_distance_preprocessor)

# Display the results
print(df)


                                           text  dependency_distance
0  The quick brown fox jumps over the lazy dog.             1.900000
1             This is another example sentence.             1.833333


In [None]:
import spacy
import pandas as pd

# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

# Preprocessor function for dependency distances with a column for each word
def dependency_distance_columns(text):
    doc = nlp(text)

    distances = {}

    for token in doc:
        # Create a column for each word, sanitized to avoid issues with special characters
        word_column_name = f"dep_dist_{token.text}_{token.i}"
        # Calculate dependency distance
        distances[word_column_name] = abs(token.i - token.head.i)

    return distances

# Example: Apply to a DataFrame
data = {'text': ["The quick brown fox jumps over the lazy dog.",
                 "This is another example sentence."]}
df = pd.DataFrame(data)

# Apply the preprocessor to each row
df_expanded = df['text'].apply(dependency_distance_columns)

# Convert the result to a DataFrame and concatenate with the original DataFrame
df_expanded = pd.DataFrame(df_expanded.tolist())
df_final = pd.concat([df, df_expanded], axis=1)

# Display the final DataFrame
df_final.head()


Unnamed: 0,text,dep_dist_The_0,dep_dist_quick_1,dep_dist_brown_2,dep_dist_fox_3,dep_dist_jumps_4,dep_dist_over_5,dep_dist_the_6,dep_dist_lazy_7,dep_dist_dog_8,dep_dist_._9,dep_dist_This_0,dep_dist_is_1,dep_dist_another_2,dep_dist_example_3,dep_dist_sentence_4,dep_dist_._5
0,The quick brown fox jumps over the lazy dog.,3.0,2.0,1.0,1.0,0.0,1.0,2.0,1.0,3.0,5.0,,,,,,
1,This is another example sentence.,,,,,,,,,,,1.0,0.0,2.0,1.0,3.0,4.0
