In [None]:
#Imports
import sys
sys.path.append('../../')

from pyspark.sql import SparkSession
from pyspark.ml import Pipeline

from sparknlp.annotator import *
from sparknlp.common import RegexRule
from sparknlp.base import DocumentAssembler

In [None]:
data = spark. \
        read. \
        parquet("../../../../spark-nlp/src/test/resources/sentiment.parquet"). \
        limit(10000)
data.cache()
data.count()

In [None]:
sentiment_dict = {
    "good":"positive",
    "nice":"positive",
    "awesome":"positive",
    "cool":"positive",
    "superb":"positive",
    "bad":"negative",
    "uninspired":"negative",
    "expensive":"negative",
    "disappointed":"negative",
    "recommend others to avoid":"negative",
    "sad":"negative",
    "missed": "negative",
    "lack of":"revert",
    "not":"revert",
    "too":"increment",
    "very":"increment",
    "so": "increment",
    "sorely":"increment",
    "barely":"decrement",
    "little":"decrement"
}

In [None]:
document_assembler = DocumentAssembler() \
    .setInputCol("text")

sentence_detector = SentenceDetectorModel() \
    .setInputCols(["document"]) \
    .setOutputCol("sentence")

tokenizer = RegexTokenizer() \
    .setInputCols(["sentence"]) \
    .setOutputCol("token")

lemmatizer = Lemmatizer() \
    .setInputCols(["token"]) \
    .setOutputCol("lemma") \
    .setDictionary("../../../src/test/resources/lemma-corpus/AntBNC_lemmas_ver_001.txt")
        
sentiment_detector = SentimentDetectorModel() \
    .setInputCols(["lemma", "sentence"]) \
    .setOutputCol("sentiment_score")

In [None]:
pipeline = Pipeline(stages=[document_assembler, sentence_detector, tokenizer, lemmatizer, sentiment_detector])
model = pipeline.fit(data)
result = model.transform(data)

In [None]:
result.select("sentence").take(1)

In [None]:
list(map(
    lambda r : (r.asDict()["text"].strip(),
                r.asDict()["sentiment_score"][0].asDict()["metadata"]["sentiment"]
               ),
    result.select("text", "sentiment_score").take(5)))

In [None]:
result.show()