### Import Spacy

If you cannot import it, then open Anaconda Prompt as an Administrator (right click on Anaconda Prompt -> More -> Open as Admin) and then:

- conda install -c conda-forge spacy
- python -m spacy download en

This should work.

In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")

In [None]:
text = "Tell me, Muse, of that man of many resources, who wandered far and wide, after sacking the holy citadel of Troy. Many the men whose cities he saw, whose ways he learned. Many the sorrows he suffered at sea, while trying to bring himself and his friends back alive. Yet despite his wishes he failed to save them, because of their own un-wisdom, foolishly eating the cattle of Helios, the Sun, so the god denied them their return. Tell us of these things, beginning where you will, Goddess, Daughter of Zeus."
doc = nlp(text)

for token in doc:
    print(token.text, token.lemma_, token.pos_, token.is_stop)

### Reformating the spaCy parse of that sentence as a pandas dataframe

In [None]:
import pandas as pd

cols = ("text", "lemma", "POS", "explain", "stopword")
rows = []

for t in doc:
    row = [t.text, t.lemma_, t.pos_, spacy.explain(t.pos_), t.is_stop]
    rows.append(row)

df = pd.DataFrame(rows, columns=cols)
    
df

### Visualize the Parse Tree

In [None]:
from spacy import displacy

displacy.render(doc, style="dep")

### Sentence Boundary Detection (SBD) – also known as Sentence Segmentation 

In [None]:
text = "Now, all the others, who had escaped destruction, had reached their homes, and were free of sea and war. \
        He alone, longing for wife and home, Calypso, the Nymph, kept in her echoing cavern, desiring him for a husband. \
        Not even when the changing seasons brought the year the gods had chosen for his return to Ithaca was he free from danger, \
        and among friends. \
        Yet all the gods pitied him, except Poseidon, \
        who continued his relentless anger against godlike Odysseus until he reached his own land at last."

doc = nlp(text)

for sent in doc.sents:
    print(">", sent)

### Non-Destructive Tokenization - Indexes

In [None]:
for sent in doc.sents:
    print(">", sent.start, sent.end)

In [None]:
doc[25:52]

In [None]:
token = doc[45]
print(token.text, token.lemma_, token.pos_)

### Acquiring Text

In [None]:
import sys
import warnings

warnings.filterwarnings("ignore")

In [None]:
from bs4 import BeautifulSoup
import requests
import traceback

def get_text (url):
    buf = []
    
    try:
        soup = BeautifulSoup(requests.get(url).text, "html.parser")
        
        for p in soup.find_all("p"):
            buf.append(p.get_text())

        return "\n".join(buf)
    except:
        print(traceback.format_exc())
        sys.exit(-1)

In [None]:
lic = {}
lic["mit"] = nlp(get_text("https://opensource.org/licenses/MIT"))
lic["asl"] = nlp(get_text("https://opensource.org/licenses/Apache-2.0"))
lic["bsd"] = nlp(get_text("https://opensource.org/licenses/BSD-3-Clause"))

for sent in lic["bsd"].sents:
    print(">", sent)

### Compare Pairs

In [None]:
pairs = [
    ["mit", "asl"],
    ["asl", "bsd"],
    ["bsd", "mit"]
]

for a, b in pairs:
    print(a, b, lic[a].similarity(lic[b]))

### Natural Language Understanding

In [None]:
text = "Now, all the others, who had escaped destruction, had reached their homes, and were free of sea and war. He alone, longing for wife and home, Calypso, the Nymph, kept in her echoing cavern, desiring him for a husband. Not even when the changing seasons brought the year the gods had chosen for his return to Ithaca was he free from danger, and among friends. Yet all the gods pitied him, except Poseidon, who continued his relentless anger against godlike Odysseus until he reached his own land at last."
doc = nlp(text)

for chunk in doc.noun_chunks:
    print(chunk.text)

### Named Entities

In [None]:
for ent in doc.ents:
    print(ent.text, ent.label_)

### Visualize Name Entities

In [None]:
displacy.render(doc, style="ent")

### NLTK

In [None]:
import nltk

nltk.download("wordnet")

If you have problems with Spacy_Wordnet then:
    
    pip install spacy-wordnet

In [None]:
from spacy_wordnet.wordnet_annotator import WordnetAnnotator

print("before", nlp.pipe_names)

if "WordnetAnnotator" not in nlp.pipe_names:
    nlp.add_pipe(WordnetAnnotator(nlp.lang), after="tagger")
    
print("after", nlp.pipe_names)

### Perfom Automatic Lookup

In [None]:
token = nlp("withdraw")[0]
token._.wordnet.synsets()

In [None]:
token._.wordnet.lemmas()

In [None]:
token._.wordnet.wordnet_domains()

### Particular Domain or Set of Topics

In [None]:
domains = ["finance", "banking"]
sentence = nlp(u"I want to withdraw 5.000 euros.")

enriched_sent = []

for token in sentence:
    # get synsets within the desired domains
    synsets = token._.wordnet.wordnet_synsets_for_domain(domains)
    
    if synsets:
        lemmas_for_synset = []
        
        for s in synsets:
            # get synset variants and add to the enriched sentence
            lemmas_for_synset.extend(s.lemma_names())
            enriched_sent.append("({})".format("|".join(set(lemmas_for_synset))))
    else:
        enriched_sent.append(token.text)

print(" ".join(enriched_sent))

### Analyze Text Data

In [None]:
import scattertext as st

if "merge_entities" not in nlp.pipe_names:
    nlp.add_pipe(nlp.create_pipe("merge_entities"))

if "merge_noun_chunks" not in nlp.pipe_names:
    nlp.add_pipe(nlp.create_pipe("merge_noun_chunks"))

convention_df = st.SampleCorpora.ConventionData2012.get_data() 
corpus = (st.CorpusFromPandas(convention_df,
                             category_col="party",
                             text_col="text",
                             nlp=st.whitespace_nlp_with_sentences).build())

In [None]:
html = st.produce_scattertext_explorer(
    corpus,
    category="democrat",
    category_name="Democratic",
    not_category_name="Republican",
    width_in_pixels=1000,
    metadata=convention_df["speaker"]
)

In [None]:
from IPython.display import IFrame

file_name = "foo.html"

with open(file_name, "wb") as f:
    f.write(html.encode("utf-8"))

IFrame(src=file_name, width = 1200, height=700)