# Natural Language Processing with spacy

This notebook will walk through basic spaCy operations for NLP, including tokenization, POS tagging, Named Entity Recognition, dependency parsing, and more.

## Libraries and settings

In [None]:
# Libraries
import os
import spacy
import pandas as pd
from spacy import displacy
from spacy.language import Language

# Download spaCy's pre-trained language model
def install_spacy_model(model_name):
    try:
        spacy.load(model_name)
        print(f"Model '{model_name}' is already installed.")
    except OSError:
        print(f"Model '{model_name}' not found. Installing...")
        os.system(f"python -m spacy download {model_name}")

# Check and install spaCy's pre-trained language model if not available
install_spacy_model("en_core_web_sm")

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Get current working directory
print(os.getcwd())

## Loading the spaCy Model
We'll load the English language model using the `spacy.load()` function.

In [2]:
# Load the English language model
nlp = spacy.load("en_core_web_sm")

## Basic Text Processing
Let's process some text and extract individual tokens.

In [None]:
import pandas as pd
import spacy

# Load a spaCy model
nlp = spacy.load("en_core_web_sm")

# Sample Data with apartment description
data = {
    'description': [
        "Luxurious 3-bedroom apartment with mountain view and high-end finishes in the city of Zürich, Switzerland."
    ]
}
df = pd.DataFrame(data)

# Process each description in the DataFrame
for description in df['description']:
    doc = nlp(description)
    print(f"Processing description: {description}")
    # Display tokens
    for token in doc:
        print(token.text)
    print("\n")

## Part-of-Speech (POS) Tagging
Now, we can look at the POS tags for each word.

In [None]:
# Display tokens with their POS tags
for token in doc:
    print(f'{token.text:10} {token.pos_}')

## Named Entity Recognition (NER)
Next, we will use spaCy to identify entities in a text, such as names, dates, organizations, etc.

In [None]:
# Named entity recognition
for ent in doc.ents:
    print(f'{ent.text} {ent.label_}')

## Dependency Parsing
We'll also explore dependency parsing, which analyzes the grammatical relationships between words.

In [None]:
# Display the syntactic dependency structure
for token in doc:
    print(f'{token.text:10} {token.dep_:10} {token.head.text:10}')

## Visualization of entities and dependencies
We can use spaCy's `displacy` visualizer to display the entities and dependencies in a visually intuitive format.

In [None]:
# Visualize named entities
displacy.render(doc, style="ent")

# Visualize dependency parsing
displacy.render(doc, style="dep", jupyter=True)

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')