<a href="https://colab.research.google.com/github/Brand-Sentiment-Tracking/dev-sentiment-package/blob/main/johnsnow/Evaluate_Sentiment_Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Evaluate Sentiment Predictions


## Colab Setup

In [1]:
# Install PySpark and Spark NLP
! pip install -q pyspark==3.1.2 spark-nlp

# Install Spark NLP Display lib
! pip install --upgrade -q spark-nlp-display

In [2]:
### Imports from prod package

import sparknlp
from sparknlp.base import DocumentAssembler
from sparknlp.annotator import Tokenizer, BertForSequenceClassification
from sparknlp.pretrained import PretrainedPipeline

# Import functions to manipulate dataframe
from pyspark.sql.functions import array_join
from pyspark.sql.functions import col, explode
from pyspark.sql.window import Window
from pyspark.sql.functions import monotonically_increasing_id, row_number
from pyspark.sql.types import StringType, ArrayType
from pyspark.ml import Pipeline
import pyspark.sql.functions as F

import pandas as pd

In [3]:
import sparknlp
import pandas as pd
import random
import time
from pyspark.ml import Pipeline
from pyspark.sql import SparkSession
from pyspark.sql.types import StringType
import pyspark.sql.functions as F
from tabulate import tabulate
import sparknlp
from sparknlp.annotator import *
from sparknlp.base import *
from sparknlp.pretrained import PretrainedPipeline
from sparknlp_display import NerVisualizer

# Spark dataframe manipulation imports
from pyspark.sql.functions import array_join
from pyspark.sql.functions import col, explode, expr, greatest
from pyspark.sql.window import Window
from pyspark.sql.functions import monotonically_increasing_id, row_number

# spark = sparknlp.start(gpu=False)
spark = sparknlp.start(gpu=True)

print("Spark NLP version: ", sparknlp.version())
print("Apache Spark version: ", spark.version)

Spark NLP version:  3.4.3
Apache Spark version:  3.1.2


## Define Sentiment Prediction Class

In [4]:
# Define the spark udf function outside the class
def append_sentiment(pair_list, sentiment):
    """Append sentiment to each entry in pred brand list. """

    for pair in pair_list:
        pair.append(sentiment)

    return pair_list


class SentimentIdentification:

    def __init__(self, MODEL_NAME):
        """Creates a class for sentiment identication using specified model.
        Args:
          MODEL_NAME: Name of the Spark NLP pretrained pipeline.
        """

        # Create the pipeline instance
        self.MODEL_NAME = MODEL_NAME
        spark = sparknlp.start()

        # Create a custom pipline if requested
        if self.MODEL_NAME == "custom_pipeline":  # https://nlp.johnsnowlabs.com/2021/11/03/bert_sequence_classifier_finbert_en.html
            document_assembler = DocumentAssembler() \
                .setInputCol('text') \
                .setOutputCol('document')

            tokenizer = Tokenizer() \
                .setInputCols(['document']) \
                .setOutputCol('token')

            sequenceClassifier = BertForSequenceClassification \
                .pretrained('bert_sequence_classifier_finbert', 'en') \
                .setInputCols(['token', 'document']) \
                .setOutputCol('class') \
                .setCaseSensitive(True) \
                .setMaxSentenceLength(512)

            pipeline = Pipeline(stages=[
                document_assembler,
                tokenizer,
                sequenceClassifier
            ])

            self.pipeline_model = pipeline.fit(spark.createDataFrame([['']]).toDF("text"))

        else:
            self.pipeline_model = PretrainedPipeline(self.MODEL_NAME, lang='en')

    def predict_dataframe(self, df):
        """Annotates the input dataframe with the classification results.
        Args:
          df : Pandas or Spark dataframe to classify (must contain a "text" column)
        """
        spark = sparknlp.start()

        if isinstance(df, pd.DataFrame):
            # Convert to spark dataframe for faster prediction
            df_spark = spark.createDataFrame(df)
        else:
            df_spark = df

        # Annotate dataframe with classification results
        df_spark = self.pipeline_model.transform(df_spark)

        # Extract sentiment score
        if self.MODEL_NAME == "custom_pipeline":
            df_spark_scores = df_spark.select(
                explode(col("class.metadata")).alias("metadata")) \
                .select(col("metadata")["Some(positive)"].alias("positive"),
                        col("metadata")["Some(neutral)"].alias("neutral"),
                        col("metadata")["Some(negative)"].alias("negative"))
        else:
            df_spark_scores = df_spark.select(
                explode(col("class.metadata")).alias("metadata")) \
                .select(col("metadata")["positive"].alias("positive"),
                        col("metadata")["neutral"].alias("neutral"),
                        col("metadata")["negative"].alias("negative"))

        df_spark_scores = df_spark_scores.withColumn("score", col("positive") - col("negative"))

        # Extract only target and label columns
        df_spark = df_spark.select("text", "source_domain", "date_publish", "language", "Predicted_Entity", "class.result")  # This is to run main.py

        # Rename to result column to Predicted Sentiment
        df_spark = df_spark.withColumnRenamed("result", "Predicted_Sentiment")

        # Convert sentiment from a list to a string
        df_spark = df_spark.withColumn("Predicted_Sentiment", array_join("Predicted_Sentiment", ""))

        # Join the predictions dataframe to the scores dataframe
        # Add temporary column index to join
        w = Window.orderBy(monotonically_increasing_id())
        df_spark_with_index = df_spark.withColumn("columnindex", row_number().over(w))
        df_spark_scores_with_index = df_spark_scores.withColumn("columnindex", row_number().over(w))

        # Join the predictions and the scores in one dataframe
        df_spark_with_index = df_spark_with_index.join(
            df_spark_scores_with_index,
            df_spark_with_index.columnindex == df_spark_scores_with_index.columnindex,
            'inner').drop(df_spark_scores_with_index.columnindex)

        # Remove the index column
        df_spark_combined = df_spark_with_index.drop(df_spark_with_index.columnindex)

        # Append sentiment to each entry in pred brand list
        append_sent = F.udf(lambda x, y: append_sentiment(x, y), ArrayType(ArrayType(StringType())))  # Output a list of lists
        df_spark_combined = df_spark_combined.withColumn('Predicted_Entity_and_Sentiment', append_sent('Predicted_Entity', 'Predicted_Sentiment'))

        # Keep positive/neutral/negative probabilities in the output spark df
        df_spark_combined = df_spark_combined.drop('Predicted_Entity', 'Predicted_Sentiment')

        return df_spark_combined

    def predict_and_evaluate(self, df_spark):
        """Computes accuracy by comparing labels of input dataframe.

        Args:
          df_spark: spark dataframe containing "text" and "True_Sentiment" column
        """
    
        from sklearn.metrics import classification_report, accuracy_score

        # Annotate dataframe with classification results
        df_spark = self.pipeline_model.transform(df_spark)


        # Extract only necessary columns
        if self.MODEL_NAME == "custom_pipeline" or self.MODEL_NAME == "classifierdl_bertwiki_finance_sentiment_pipeline":
              df_spark = df_spark.select("text", "True_Sentiment", "class.result")
        else:
              df_spark = df_spark.select("text", "True_Sentiment", "sentiment.result")
                                   
        # Rename to result column to Predicted Sentiment
        df_spark = df_spark.withColumnRenamed("result", "Predicted_Sentiment")

        # Convert sentiment from a list to a string
        df_spark = df_spark.withColumn("Predicted_Sentiment", array_join("Predicted_Sentiment", ""))

        # Convert to pandas to use sklearn functions
        df_pandas_postprocessed = df_spark.toPandas()

        # Replace if abbreviated
        # Modify predicted labels to match with true labels
        df_pandas_postprocessed = df_pandas_postprocessed.replace({'Predicted_Sentiment': {'pos' : 'positive', 'neg' : 'negative'}})

        # Compute the accuracy
        accuracy = accuracy_score(df_pandas_postprocessed["True_Sentiment"], df_pandas_postprocessed["Predicted_Sentiment"])
        accuracy *= 100
        classification_report = classification_report(df_pandas_postprocessed["True_Sentiment"], df_pandas_postprocessed["Predicted_Sentiment"])

        # Alternatively if the input is a postprocessed spark dataframe
        # Compute accuracy by comparing each true label with predicted label
        # accuracy = df_spark.filter(df_spark.Predicted_Sentiment == df_spark.True_Sentiment).count()/ num_sentences

        return accuracy, classification_report, df_pandas_postprocessed


## Read Extracted Dataset

In [5]:
sentiment_url_2 = 'https://raw.githubusercontent.com/Brand-Sentiment-Tracking/dev-sentiment-package/main/data/unlabelled_test_en_10_labels.csv' # owr extracted data
sentiment_url = 'https://raw.githubusercontent.com/Brand-Sentiment-Tracking/dev-sentiment-package/main/data/labelled_1.csv'

num_sentences = 500 # Total is 

# # Store data in a Pandas Dataframe
cols_to_read = ['text',"sentiment (Max's take)"]
df_pandas = pd.read_csv(sentiment_url, usecols=cols_to_read)

# Rename sentiment to True_Sentiment
df_pandas.rename(columns={"sentiment (Max's take)":"True_Sentiment"},inplace=True)

# # Make dataset smaller for faster runtime
total_num_sentences = df_pandas.shape[0]
df_pandas.drop(df_pandas.index[num_sentences:total_num_sentences], inplace=True)

# Replace 1, 2 , 3 with negative, neutral, positive
df_pandas["True_Sentiment"].replace({1.0: "negative", 2.0: "neutral", 3.0: "positive"}, inplace=True)


# Fill the NaN entires with zeros
# df_pandas['date_publish'] = df_pandas['date_publish'].fillna(0.0)

display(df_pandas)

# # Convert to spark dataframe  
df_spark = spark.createDataFrame(df_pandas)

# df_spark.show()

# # # Create a preprocessed spark dataframe
# from pyspark import SparkFiles
# spark.sparkContext.addFile(sentiment_url)

# # Read raw dataframe
# df_spark = spark.read.option("header","true").option("multiline","true").csv("file://"+SparkFiles.get("unlabelled_test_en_10_labels.csv"))

# df_spark = df_spark.select("text", "sentiment")

# # Rename columns
# df_spark = df_spark.withColumnRenamed("sentiment", "True_Sentiment")

# df_spark = df_spark.limit(num_sentences)

# df_spark.show()


Unnamed: 0,text,True_Sentiment
0,RYU Apparel Brings Customer Care Services In-h...,positive
1,AMREP Co. (NYSE:AXR) Director Edward B. Cloues...,neutral
2,"Johnson arrives in India to meet Modi, seek ec...",neutral
3,Remembering Bearden High School,neutral
4,"‘My body is here, but my soul, mind and everyt...",neutral
...,...,...
495,I’m a McDonald’s superfan so tried out ALL the...,negative
496,It's too early to compare Afena-Gyan to me - A...,neutral
497,US-Saudi relations nearing 'breaking point' --...,negative
498,2022 BMW 7 Series revealed with electric drive...,neutral


## Predict the sentiment

### Create the sentiment classifier object

In [18]:
identifier_pretrained = SentimentIdentification(MODEL_NAME = "custom_pipeline")
# identifier_pretrained = SentimentIdentification(MODEL_NAME = "classifierdl_bertwiki_finance_sentiment_pipeline")
# identifier_pretrained = SentimentIdentification(MODEL_NAME = "analyze_sentimentdl_glove_imdb") 
# identifier_pretrained = SentimentIdentification(MODEL_NAME = "analyze_sentimentdl_use_imdb")
# identifier_pretrained = SentimentIdentification(MODEL_NAME = "analyze_sentimentdl_use_twitter")

# Classification reports

# Custom_pipeline:

# 75.4
#               precision    recall  f1-score   support

#     negative       0.48      0.51      0.49        65
#      neutral       0.83      0.87      0.85       385
#     positive       0.34      0.20      0.25        50

#     accuracy                           0.75       500
#    macro avg       0.55      0.53      0.53       500
# weighted avg       0.74      0.75      0.74       500

# classifierdl_bertwiki_finance_sentiment_pipeline:

# 76.2
#               precision    recall  f1-score   support

#     negative       0.51      0.34      0.41        65
#      neutral       0.83      0.88      0.86       385
#     positive       0.39      0.40      0.40        50

#     accuracy                           0.76       500
#    macro avg       0.58      0.54      0.55       500
# weighted avg       0.75      0.76      0.75       500

# analyze_sentimentdl_glove_imdb:

# 18.6
#               precision    recall  f1-score   support

#     negative       0.22      0.65      0.32        65
#      neutral       0.76      0.03      0.06       385
#     positive       0.13      0.76      0.22        50

#     accuracy                           0.19       500
#    macro avg       0.37      0.48      0.20       500
# weighted avg       0.63      0.19      0.11       500

# analyze_sentimentdl_use_imdb:

# 15.8
#               precision    recall  f1-score   support

#     negative       0.30      0.62      0.40        65
#      neutral       0.00      0.00      0.00       385
#     positive       0.11      0.78      0.19        50

#     accuracy                           0.16       500
#    macro avg       0.13      0.47      0.20       500
# weighted avg       0.05      0.16      0.07       500

# analyze_sentimentdl_use_twitter:

# 18.6
#               precision    recall  f1-score   support

#     negative       0.21      0.83      0.34        65
#      neutral       0.92      0.03      0.06       385
#     positive       0.12      0.56      0.20        50

#     accuracy                           0.19       500
#    macro avg       0.42      0.47      0.20       500
# weighted avg       0.75      0.19      0.11       500

bert_sequence_classifier_finbert download started this may take some time.
Approximate size to download 390.9 MB
[OK!]


### Compute evaluation metrics




In [20]:
# Print accuracy metrics

start = time.time()
accuracy, report, df_pandas_postprocessed = identifier_pretrained.predict_and_evaluate(df_spark)
end = time.time()

display(df_pandas_postprocessed)

print(f"{end-start} seconds elapsed to predict and evaluate {num_sentences} sentences.")

print(accuracy)
print(report)

Unnamed: 0,text,True_Sentiment,Predicted_Sentiment
0,RYU Apparel Brings Customer Care Services In-h...,positive,neutral
1,AMREP Co. (NYSE:AXR) Director Edward B. Cloues...,neutral,neutral
2,"Johnson arrives in India to meet Modi, seek ec...",neutral,neutral
3,Remembering Bearden High School,neutral,neutral
4,"‘My body is here, but my soul, mind and everyt...",neutral,neutral
...,...,...,...
495,I’m a McDonald’s superfan so tried out ALL the...,negative,neutral
496,It's too early to compare Afena-Gyan to me - A...,neutral,neutral
497,US-Saudi relations nearing 'breaking point' --...,negative,neutral
498,2022 BMW 7 Series revealed with electric drive...,neutral,neutral


17.832666158676147 seconds elapsed to predict and evaluate 500 sentences.
75.4
              precision    recall  f1-score   support

    negative       0.48      0.51      0.49        65
     neutral       0.83      0.87      0.85       385
    positive       0.34      0.20      0.25        50

    accuracy                           0.75       500
   macro avg       0.55      0.53      0.53       500
weighted avg       0.74      0.75      0.74       500



In [8]:
# Write postprocessed dataframe to csv file
df_pandas_postprocessed.to_csv('./postprocessed_data.csv')  

### Evaluate from file

In [9]:
from sklearn.metrics import classification_report, accuracy_score

# Read csv file with both labels and prediction and compute evaluation metrics
df_pandas_labelled = pd.read_csv('./postprocessed_data.csv')

# Rename columns if necessary
df_pandas_labelled.rename(columns={"True_Sentiment":"True_Sentiment", "Predcited_Sentiment":"Predicted_Sentiment"},inplace=True)

# Compute the evaluation metrics
accuracy = accuracy_score(df_pandas_labelled["True_Sentiment"], df_pandas_labelled["Predicted_Sentiment"])
accuracy *= 100
classification_report = classification_report(df_pandas_labelled["True_Sentiment"], df_pandas_labelled["Predicted_Sentiment"])

print(f"Accuracy: {accuracy}")
print(classification_report)

Accuracy: 18.6
              precision    recall  f1-score   support

    negative       0.21      0.83      0.34        65
     neutral       0.92      0.03      0.06       385
    positive       0.12      0.56      0.20        50

    accuracy                           0.19       500
   macro avg       0.42      0.47      0.20       500
weighted avg       0.75      0.19      0.11       500

