## **TASK 08: DEPENDENCY PARSING AND ADVANCED STRUCTURES**

### Using 'clustered_documnets.csv' Dataset

In [1]:
# Importing necessary libraries

import pandas as pd
import spacy
import ast
from spacy import displacy

In [3]:
# Loading the datasets

data = pd.read_csv('/content/clustered_documents.csv')

# Loading the Spacy English language model

nlp = spacy.load("en_core_web_sm")

In [28]:
# Extracting the first 50 rows in Tokens column

tokens = data['Tokens'].head(50)

In [31]:
# Processing token lists directly

def analyze_tokens(tokens_list):
    structures = {
        "Subjects": [],
        "Verbs": [],
        "Objects": [],
        "Modifiers": [],
        "Adjectives": [],
        "Adverbs": []
    }

    # Creating a Spacy Doc object from tokens

    doc = nlp(" ".join(tokens_list))  # This will process tokens directly in Spacy

    for token in doc:

        # Identifying subjects

        if "subj" in token.dep_:
            structures["Subjects"].append(token.text)

        # Identifying verbs

        if token.pos_ == "VERB":
            structures["Verbs"].append(token.text)

        # Identifying objects

        if "obj" in token.dep_:
            structures["Objects"].append(token.text)

        # Identifying modifiers

        if "mod" in token.dep_ or token.dep_ == "amod":
            structures["Modifiers"].append(token.text)

        # Identifying adjectives

        if token.pos_ == "ADJ":
            structures["Adjectives"].append(token.text)

        # Identifying adverbs

        if token.pos_ == "ADV":
            structures["Adverbs"].append(token.text)

    return doc, structures

In [33]:
# Analyzing syntactic structures directly from tokens

for i, token_str in enumerate(tokens):
    print(f"Analyzing Sentence {i + 1}:\n")
    tokens_list = ast.literal_eval(token_str)  # Safely converting stringified list to actual list
    doc, syntactic_structures = analyze_tokens(tokens_list)

Analyzing Sentence 1:

Analyzing Sentence 2:

Analyzing Sentence 3:

Analyzing Sentence 4:

Analyzing Sentence 5:

Analyzing Sentence 6:

Analyzing Sentence 7:

Analyzing Sentence 8:

Analyzing Sentence 9:

Analyzing Sentence 10:

Analyzing Sentence 11:

Analyzing Sentence 12:

Analyzing Sentence 13:

Analyzing Sentence 14:

Analyzing Sentence 15:

Analyzing Sentence 16:

Analyzing Sentence 17:

Analyzing Sentence 18:

Analyzing Sentence 19:

Analyzing Sentence 20:

Analyzing Sentence 21:

Analyzing Sentence 22:

Analyzing Sentence 23:

Analyzing Sentence 24:

Analyzing Sentence 25:

Analyzing Sentence 26:

Analyzing Sentence 27:

Analyzing Sentence 28:

Analyzing Sentence 29:

Analyzing Sentence 30:

Analyzing Sentence 31:

Analyzing Sentence 32:

Analyzing Sentence 33:

Analyzing Sentence 34:

Analyzing Sentence 35:

Analyzing Sentence 36:

Analyzing Sentence 37:

Analyzing Sentence 38:

Analyzing Sentence 39:

Analyzing Sentence 40:

Analyzing Sentence 41:

Analyzing Sentence 42:

A

In [36]:
# Displaying token dependencies

print(f"{'Token':<15}{'Dependency':<15}{'Head':<15}{'POS':<10}")
print("-" * 50)
for token in doc:
    print(f"{token.text:<15}{token.dep_:<15}{token.head.text:<15}{token.pos_:<10}")
    print("\n")

Token          Dependency     Head           POS       
--------------------------------------------------
firstofitskind nmod           museum         VERB      


virtual        amod           museum         ADJ       


museum         npadvmod       dedicated      NOUN      


dedicated      amod           community      VERB      


jewish         amod           community      ADJ       


community      nsubj          launched       NOUN      


around         prep           community      ADP       


world          pobj           around         NOUN      


launched       ROOT           launched       VERB      


alliance       dobj           launched       NOUN      


kol            compound       yisrael        PROPN     


yisrael        nsubj          haverim        PROPN     


haverim        ccomp          launched       PROPN     


kiah           nsubj          combining      PROPN     


combining      ccomp          haverim        VERB      


cuttingedge    compound

In [38]:
# Displaying syntactic structures

print("Syntactic Structures:")
print("Subjects:", syntactic_structures["Subjects"])
print("Verbs:", syntactic_structures["Verbs"])
print("Objects:", syntactic_structures["Objects"])
print("Modifiers:", syntactic_structures["Modifiers"])
print("Adjectives:", syntactic_structures["Adjectives"])
print("Adverbs:", syntactic_structures["Adverbs"])
print("\n")

Syntactic Structures:
Subjects: ['community', 'yisrael', 'kiah']
Verbs: ['firstofitskind', 'dedicated', 'launched', 'combining']
Objects: ['world', 'alliance', 'char']
Modifiers: ['firstofitskind', 'virtual', 'museum', 'dedicated', 'jewish', 'technology', 'rich', 'histori', '4195']
Adjectives: ['virtual', 'jewish', 'rich']
Adverbs: []




In [50]:
# Rendering dependency visualization

print(f"Visualization for Sentences: \n")
displacy.render(doc, style="dep", jupyter=True)

Visualization for Sentences: 

