## Multilabel Classification of GoEmotions Dataset

Dataset Source: https://www.kaggle.com/datasets/debarshichanda/goemotions

#### Import Necessary Libraries

In [0]:
import pyspark

from pyspark.ml import Pipeline
import pyspark.sql.functions as F
from pyspark.sql.types import StringType, StructType, StructField, IntegerType, ArrayType, DoubleType
from pyspark.ml.feature import StringIndexer, IndexToString

import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *

#### Display Library Versions

In [0]:
print(f"Apache Spark version:".rjust(24), spark.version)
print("Spark NLP version:".rjust(24), sparknlp.version())

   Apache Spark version: 3.3.0
      Spark NLP version: 4.3.1


#### Function to Ingest Dataset

In [0]:
def ingest_data(file_location: str, 
                schema: StructType, 
                delimiter: str = ',' 
               ) -> pyspark.sql.dataframe.DataFrame:
    '''
    This function reads in the dataset that is passed to it
    and fits the schema that is passed in to the dataset.
    '''
    file_type = "csv"
    infer_schema = "false"
    first_row_is_header = "true"
    
    dataset = spark.read.format(file_type) \
        .option("inferSchema", infer_schema) \
        .option("header", first_row_is_header) \
        .option("sep", delimiter) \
        .schema(schema)\
        .csv(file_location)
    
    return dataset

#### Function to Ingest Multiple Data Files as One Dataset (in Addition to Original Dataset Ingestion Function)

In [0]:
def ingest_multiple_datasets(datasets: [str],
                             schema: StructType
                            ) -> pyspark.sql.dataframe.DataFrame:
    '''
    This function ingests multiple datasets that 
    are fit to a schema and have a literal value
    added into a new feature/class.
    '''
    temp_df =  spark.createDataFrame([], schema)
    new_df =  spark.createDataFrame([], schema)
    
    for dataset in range(len(datasets)):
        # extract df of only the desired labels
        temp_df = ingest_data(datasets[dataset], schema)
        
        # concatenate it to the new_df
        new_df = new_df.union(temp_df)
    
    return new_df

#### Function to Evaluate Multilabel Classification Models

In [0]:
def evaluate_multilabel_model(dataset: pyspark.sql.dataframe.DataFrame, 
                              metrics: [str], 
                              model_name: str 
                             ) -> None:
    '''
    This function calculates & displays metrics for a multilabel 
    classification analysis.
    '''
    from pyspark.ml.evaluation import MultilabelClassificationEvaluator
    
    print("+---------------------------------------------+")
    print("|  " + model_name.center(41) + "  |")
    print("+---------------------------------------------+")
    print("|   %s  |  %s   |" % ("Metric".rjust(20), "Value".ljust(14)))
    print("+---------------------------------------------+")
    
    for x in metrics:
        evaluator = MultilabelClassificationEvaluator(labelCol="label", \
                                                      predictionCol="prediction", \
                                                      metricName=x) 
        score = evaluator.evaluate(dataset)
        print("|   %s  |  %s   |" % (x.rjust(20), str(round(score, 6)).ljust(14)))
        print("+---------------------------------------------+")

#### Ingest Dataset

In [0]:
data_files = ["/FileStore/tables/GoEmotions/goemotions_1.csv",
              "/FileStore/tables/GoEmotions/goemotions_2.csv"]

orig_schema = StructType([
    StructField("text", StringType(), True),
    StructField("id", IntegerType(), True),
    StructField("author", IntegerType(), True),
    StructField("subreddit", StringType(), True),
    StructField("link_id", StringType(), True),
    StructField("parent_id", StringType(), True),
    StructField("created_utc", StringType(), True),
    StructField("rater_id", StringType(), True),
    StructField("example_very_unclear", StringType(), True),
    StructField("admiration", IntegerType(), True),
    StructField("amusement", IntegerType(), True),
    StructField("anger", IntegerType(), True),
    StructField("annoyance", IntegerType(), True),
    StructField("approval", IntegerType(), True),
    StructField("caring", IntegerType(), True),
    StructField("confusion", IntegerType(), True),
    StructField("curiosity", IntegerType(), True),
    StructField("desire", IntegerType(), True),
    StructField("disappointment", IntegerType(), True),
    StructField("disapproval", IntegerType(), True),
    StructField("disgust", IntegerType(), True),
    StructField("embarrassment", IntegerType(), True),
    StructField("excitement", IntegerType(), True),
    StructField("fear", IntegerType(), True),
    StructField("gratitude", IntegerType(), True),
    StructField("grief", IntegerType(), True),
    StructField("joy", IntegerType(), True),
    StructField("love", IntegerType(), True),
    StructField("nervousness", IntegerType(), True),
    StructField("optimism", IntegerType(), True),
    StructField("pride", IntegerType(), True),
    StructField("realization", IntegerType(), True),
    StructField("relief", IntegerType(), True),
    StructField("remorse", IntegerType(), True),
    StructField("sadness", IntegerType(), True),
    StructField("surprise", IntegerType(), True),
    StructField("neutral", IntegerType(), True),
])

data = ingest_multiple_datasets(data_files, orig_schema)

data = data.drop("id", "author", "subreddit", "link_id", "parent_id", "created_utc", "rater_id")

data = data.na.drop(how='any')

data = data.dropDuplicates()

display(data)

text,example_very_unclear,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,disappointment,disapproval,disgust,embarrassment,excitement,fear,gratitude,grief,joy,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
"That's ironic. Because [NAME] isn't getting the Wall funded, we are pulling more resources off the border blockade effort?",False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
"We've had so many stories about [NAME], that we do NOT need any about Florida lady. LOL",False,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
Why did I get a boner reading this? That Magnesium supplement is doing its job methinks,False,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Nobody forced [NAME] to take [NAME] over [NAME], if he liked him that much then he should’ve just taken him.",False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
This video doesn't even show the shoes he was wearing...,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
What kind of insider only has 250 followers?,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
Same. Even though he’s dad-dicked us 3 years in a row I will always cheer for [NAME].,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
It's good worrying about things that can be helped. Lol,False,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
If she loves the place she’s at why leave? There’s something to be said for that. Idk your all’s financial situation tho,False,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Basically. My step dad didn't choose to break his back and his knees. The whale chose to overeat and become obese,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


#### More Preprocessing

In [0]:
data = data.filter(data.example_very_unclear == False)

data = data.drop("example_very_unclear")

display(data)

text,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,disappointment,disapproval,disgust,embarrassment,excitement,fear,gratitude,grief,joy,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
"That's ironic. Because [NAME] isn't getting the Wall funded, we are pulling more resources off the border blockade effort?",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
"We've had so many stories about [NAME], that we do NOT need any about Florida lady. LOL",0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
Why did I get a boner reading this? That Magnesium supplement is doing its job methinks,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
"Nobody forced [NAME] to take [NAME] over [NAME], if he liked him that much then he should’ve just taken him.",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
This video doesn't even show the shoes he was wearing...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
What kind of insider only has 250 followers?,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
Same. Even though he’s dad-dicked us 3 years in a row I will always cheer for [NAME].,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
It's good worrying about things that can be helped. Lol,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
If she loves the place she’s at why leave? There’s something to be said for that. Idk your all’s financial situation tho,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Basically. My step dad didn't choose to break his back and his knees. The whale chose to overeat and become obese,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


#### Descriptive Statistics for Each Feature

In [0]:
for col in data:
    print(f"{data.select(col)} : {data.select(col).distinct().count()}")

Out[8]: 'for col in data:\n    print(f"{data.select(col)} : {data.select(col).distinct().count()}")'

#### Convert label Columns into Single Label Column of ArrayType

In [0]:
label_cols = [field.name for field in data.schema.fields]
label_cols.remove('text')

for x in label_cols:
    data = data.withColumn(x, F.when(F.col(x)==1, F.lit(x)).otherwise(F.concat(F.lit(x), F.lit("Not"))))

cols_to_make_array_type = [data[x] for x in label_cols]

data = data.withColumn("labels", F.array(cols_to_make_array_type).cast(ArrayType(StringType())))

for x in label_cols:
    data = data.drop(x)

display(data)

text,labels
"That's ironic. Because [NAME] isn't getting the Wall funded, we are pulling more resources off the border blockade effort?","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot, neutral)"
"We've had so many stories about [NAME], that we do NOT need any about Florida lady. LOL","List(admirationNot, amusement, angerNot, annoyanceNot, approval, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimismNot, prideNot, realization, reliefNot, remorseNot, sadnessNot, surpriseNot, neutralNot)"
Why did I get a boner reading this? That Magnesium supplement is doing its job methinks,"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusion, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot, neutralNot)"
"Nobody forced [NAME] to take [NAME] over [NAME], if he liked him that much then he should’ve just taken him.","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimism, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot, neutralNot)"
This video doesn't even show the shoes he was wearing...,"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot, neutral)"
What kind of insider only has 250 followers?,"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot, neutral)"
Same. Even though he’s dad-dicked us 3 years in a row I will always cheer for [NAME].,"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot, neutral)"
It's good worrying about things that can be helped. Lol,"List(admiration, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot, neutralNot)"
If she loves the place she’s at why leave? There’s something to be said for that. Idk your all’s financial situation tho,"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosity, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot, neutralNot)"
Basically. My step dad didn't choose to break his back and his knees. The whale chose to overeat and become obese,"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot, neutral)"


#### Split Dataset into Training & Evaluation Datasets

In [0]:
train_ds, test_ds = data.randomSplit([0.80, 0.20], seed=42)

train_ds = train_ds.persist()
test_ds = test_ds.persist()

print(f"There are {train_ds.count()} samples in the training dataset.")
print(f"There are {test_ds.count()} samples in the testing dataset.")

There are 88184 samples in the training dataset.
There are 21965 samples in the testing dataset.


#### Define Pipeline Stages

In [0]:
# document assembler
doc = DocumentAssembler() \
        .setInputCol("text") \
        .setOutputCol("document") \
        .setCleanupMode("shrink")

# Universal Sentence Encoder
use = UniversalSentenceEncoder.pretrained() \
        .setInputCols(["document"]) \
        .setOutputCol("sentences")

# Sentiment Analysis Deep Learning Classifier
ml_clf = MultiClassifierDLApproach() \
        .setInputCols("sentences") \
        .setOutputCol("class") \
        .setLabelColumn("labels") \
        .setBatchSize(16) \
        .setMaxEpochs(15) \
        .setLr(1e-3) \
        .setThreshold(0.5) \
        .setShufflePerEpoch(False) \
        .setEnableOutputLogs(True)

tfhub_use download started this may take some time.
Approximate size to download 923.7 MB
[ | ][ / ][ — ][ \ ][ | ][ / ][ — ][ \ ][ | ][ / ][ — ][ \ ][ | ][ / ][ — ][ \ ][ | ][ / ][ — ][ \ ][ | ][ / ][ — ][OK!]


#### Build Pipeline

In [0]:
ml_pipe = Pipeline().setStages([
    doc,
    use,
    ml_clf
])

#### Fit/Train Model

In [0]:
ml_model = ml_pipe.fit(train_ds)

#### Generate Predictions Using Testing Dataset

In [0]:
predictions = ml_model.transform(test_ds)

#### Prepare Predictions for Metrics Function (Part 1)

In [0]:
preds = predictions.select(F.col('labels').alias("label"),
                                F.col('class.result').cast(ArrayType(StringType())).alias("prediction"))

train_ds = train_ds.unpersist()
test_ds = test_ds.unpersist()

preds = preds.persist()

preds = preds.withColumn("label", F.array_sort(F.col("label")).cast(ArrayType(StringType()))) \
            .withColumn("prediction", F.array_sort(F.col("prediction").cast(ArrayType(StringType()))))

display(preds)

label,prediction
"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosity, desire, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"
"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutral, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"
"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realization, reliefNot, remorseNot, sadnessNot, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"
"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapproval, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"
"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutral, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"
"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadness, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"
"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutral, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"
"List(admirationNot, amusement, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"
"List(admirationNot, amusementNot, angerNot, annoyanceNot, approval, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"
"List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caring, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)","List(admirationNot, amusementNot, angerNot, annoyanceNot, approvalNot, caringNot, confusionNot, curiosityNot, desireNot, disappointmentNot, disapprovalNot, disgustNot, embarrassmentNot, excitementNot, fearNot, gratitudeNot, griefNot, joyNot, loveNot, nervousnessNot, neutralNot, optimismNot, prideNot, realizationNot, reliefNot, remorseNot, sadnessNot, surpriseNot)"


#### Prepare Predictions for Metrics Function (Part 2)

In [0]:
### Dictionary to convert the ones
convert_to_ones = {}

for x in label_cols:
    key = x
    value = 1
    convert_to_ones[key] = value 
print(convert_to_ones) 

### Dictionary to convert the zeros
not_label_cols = [x + "Not" for x in label_cols]

convert_to_zeros = {}

for x in not_label_cols:
    key = x
    value = 0
    convert_to_zeros[key] = value 
print(convert_to_zeros) 

{'admiration': 1, 'amusement': 1, 'anger': 1, 'annoyance': 1, 'approval': 1, 'caring': 1, 'confusion': 1, 'curiosity': 1, 'desire': 1, 'disappointment': 1, 'disapproval': 1, 'disgust': 1, 'embarrassment': 1, 'excitement': 1, 'fear': 1, 'gratitude': 1, 'grief': 1, 'joy': 1, 'love': 1, 'nervousness': 1, 'optimism': 1, 'pride': 1, 'realization': 1, 'relief': 1, 'remorse': 1, 'sadness': 1, 'surprise': 1, 'neutral': 1}
{'admirationNot': 0, 'amusementNot': 0, 'angerNot': 0, 'annoyanceNot': 0, 'approvalNot': 0, 'caringNot': 0, 'confusionNot': 0, 'curiosityNot': 0, 'desireNot': 0, 'disappointmentNot': 0, 'disapprovalNot': 0, 'disgustNot': 0, 'embarrassmentNot': 0, 'excitementNot': 0, 'fearNot': 0, 'gratitudeNot': 0, 'griefNot': 0, 'joyNot': 0, 'loveNot': 0, 'nervousnessNot': 0, 'optimismNot': 0, 'prideNot': 0, 'realizationNot': 0, 'reliefNot': 0, 'remorseNot': 0, 'sadnessNot': 0, 'surpriseNot': 0, 'neutralNot': 0}


#### Prepare Predictions for Metrics Function (Part 3)

In [0]:
### For the 'label' Column

# Convert values in 'label' column that end with "Not" to "0"
def replace_with_zeros(x):
    return [convert_to_zeros.get(i,i) for i in x]
zero_converter = F.udf(replace_with_zeros)
preds = preds.withColumn("label", zero_converter(F.col("label")))

# Convert values in 'label' column that do not end with "Not" to "1"
def replace_with_ones(x):
    return [convert_to_ones.get(i,i) for i in x]
one_converter = F.udf(replace_with_ones)
preds = preds.withColumn("label", one_converter(F.col("label")))

### For the 'prediction' Column

# Convert values in 'prediction' column that end with "Not" to "0"
def replace_with_zeros(x):
    return [convert_to_zeros.get(i,i) for i in x]
zero_converter = F.udf(replace_with_zeros)
preds = preds.withColumn("prediction", zero_converter(F.col("prediction")))

# Convert values in 'prediction' column that do not end with "Not" to "1"
def replace_with_ones(x):
    return [convert_to_ones.get(i,i) for i in x]
one_converter = F.udf(replace_with_ones)
preds = preds.withColumn("prediction", one_converter(F.col("prediction")))

display(preds)

label,prediction
"[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


#### Prepare Predictions for Metrics Function (Part 4)

In [0]:
preds = preds.withColumn("label", F.split(F.col("label"),","))\
            .withColumn("prediction", F.split(F.col("prediction"),","))

preds = preds.withColumn("label", F.col("label").cast(ArrayType(DoubleType())))\
            .withColumn("prediction", F.col("prediction").cast(ArrayType(DoubleType())))

#### Evaluation Model/Results

In [0]:
metrics_to_eval = ["accuracy", "f1Measure", 
                    "precision", "recall", 
                    "microPrecision", "microRecall", 
                    "microF1Measure", "subsetAccuracy", 
                    "hammingLoss"]

evaluate_multilabel_model(preds, 
                          metrics_to_eval, 
                          "Multi-Label of GoEmotions Dataset")

+---------------------------------------------+
|      Multi-Label of GoEmotions Dataset      |
+---------------------------------------------+
|                 Metric  |  Value            |
+---------------------------------------------+
|               accuracy  |  0.933747         |
+---------------------------------------------+
|              f1Measure  |  0.965262         |
+---------------------------------------------+
|              precision  |  0.965277         |
+---------------------------------------------+
|                 recall  |  0.965252         |
+---------------------------------------------+
|         microPrecision  |  0.965272         |
+---------------------------------------------+
|            microRecall  |  0.965252         |
+---------------------------------------------+
|         microF1Measure  |  0.965262         |
+---------------------------------------------+
|         subsetAccuracy  |  0.125108         |
+---------------------------------------

### Notes & Other Takeaways From This Project
****
- The reason why the subset accuracy is so low compared to the accuracy (and most other metrics shown) is because there are 28 different (binary) label values for each sample. So, the accuracy metric shows that (on average) 26 of the 28 labels for each sample are correct. The F1 score puts that value closer to 27 of the 28 labels for each sample are correct. The subset only counts a sample as correct if all labels (in this case, 28 labels) are correct for that sample.
****