![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

# Hardcore DL by Spark NLP

## Explain Documents with Deep Learning

In [2]:
import sys
import time

#Spark ML and SQL
from pyspark.ml import Pipeline, PipelineModel
from pyspark.sql.functions import array_contains
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
#Spark NLP
import sparknlp
from sparknlp.pretrained import PretrainedPipeline
from sparknlp.annotator import *
from sparknlp.common import RegexRule
from sparknlp.base import *

### Let's create a Spark Session for our app

Let's take a look at what's behind `sparknlp.start()` function:

In [3]:
spark = sparknlp.start()

print("Spark NLP version: ", sparknlp.version())
print("Apache Spark version: ", spark.version)

Spark NLP version:  2.4.0
Apache Spark version:  2.4.4


In [4]:
pipeline = PretrainedPipeline('explain_document_dl', lang='en')

explain_document_dl download started this may take some time.
Approx size to download 168.4 MB
[OK!]


We simply send the text we want to transform and the pipeline does the work.

In [5]:
text = 'He would love to visit many beautful cities wth you. He lives in an amazing country like Germany or Pakistan.'
result = pipeline.annotate(text)

We can see the output of each annotator below. This one is doing so many things at once!

In [6]:
list(result.keys())

['entities',
 'stem',
 'checked',
 'lemma',
 'document',
 'pos',
 'token',
 'ner',
 'embeddings',
 'sentence']

In [7]:
result['sentence']

['He would love to visit many beautful cities wth you.',
 'He lives in an amazing country like Germany.']

In [11]:
result['lemma']

['He',
 'would',
 'love',
 'to',
 'visit',
 'many',
 'beautiful',
 'city',
 'wth',
 'you',
 '.',
 'He',
 'life',
 'in',
 'an',
 'amazing',
 'country',
 'like',
 'Germany',
 '.']

In [9]:
list(zip(result['checked'], result['pos']))

[('He', 'PRP'),
 ('would', 'MD'),
 ('love', 'VB'),
 ('to', 'TO'),
 ('visit', 'VB'),
 ('many', 'JJ'),
 ('beautiful', 'JJ'),
 ('cities', 'NNS'),
 ('wth', 'NN'),
 ('you', 'PRP'),
 ('.', '.'),
 ('He', 'PRP'),
 ('lives', 'VBZ'),
 ('in', 'IN'),
 ('an', 'DT'),
 ('amazing', 'JJ'),
 ('country', 'NN'),
 ('like', 'IN'),
 ('Germany', 'NNP'),
 ('.', '.')]

In [10]:
result

{'entities': ['Germany'],
 'stem': ['he',
  'would',
  'love',
  'to',
  'visit',
  'mani',
  'beauti',
  'citi',
  'wth',
  'you',
  '.',
  'he',
  'live',
  'in',
  'an',
  'amaz',
  'countri',
  'like',
  'germani',
  '.'],
 'checked': ['He',
  'would',
  'love',
  'to',
  'visit',
  'many',
  'beautiful',
  'cities',
  'wth',
  'you',
  '.',
  'He',
  'lives',
  'in',
  'an',
  'amazing',
  'country',
  'like',
  'Germany',
  '.'],
 'lemma': ['He',
  'would',
  'love',
  'to',
  'visit',
  'many',
  'beautiful',
  'city',
  'wth',
  'you',
  '.',
  'He',
  'life',
  'in',
  'an',
  'amazing',
  'country',
  'like',
  'Germany',
  '.'],
 'document': ['He would love to visit many beautful cities wth you. He lives in an amazing country like Germany.'],
 'pos': ['PRP',
  'MD',
  'VB',
  'TO',
  'VB',
  'JJ',
  'JJ',
  'NNS',
  'NN',
  'PRP',
  '.',
  'PRP',
  'VBZ',
  'IN',
  'DT',
  'JJ',
  'NN',
  'IN',
  'NNP',
  '.'],
 'token': ['He',
  'would',
  'love',
  'to',
  'visit',
  'many