In [1]:
# The results are saved to a text file, which should be renamed when you have run the entire notebook and want to do so again
testFile = "fewPerAnnotatorResultsV2re.txt"
# Next parameter is the amount of runs you want the method to do. As LLMs give different results each time, you should run it multiple times to 
# get more robust results. Setting it to n means n-1 runs
methodRuns = 11

In [2]:
import ollama
import csv
import pandas as pd
import numpy

# read the unannotated data file to a dataframe
unannotatedDataFrame = pd.read_csv('notAnnotatedData.csv', sep=';', header=0)
# create a string with all the labels
# approval has an ", as the predictions for disapproval otherwise also get counted for approval
labelsToAnnotate: str = "admiration;amusement;anger;annoyance;\"approval;caring;confusion;curiosity;desire;disappointment;disapproval;disgust;embarrassment;excitement;fear;gratitude;grief;joy;love;nervousness;optimism;pride;realization;relief;remorse;sadness;suprise;neutral"

# read the annotated files to dataframes
# 5 different annotators annotated the data, this has to be aggregated
annotated1 = pd.read_csv('annotatedData/Emotion-1.csv', sep=';', header=0)
annotated2 = pd.read_csv('annotatedData/Emotion-2.csv', sep=';', header=0)
annotated3 = pd.read_csv('annotatedData/Emotion-3.csv', sep=';', header=0)
annotated4 = pd.read_csv('annotatedData/Emotion-4.csv', sep=',', header=0)
annotated5 = pd.read_csv('annotatedData/Emotion-5.csv', sep=',', header=0)
# drop the not needed columns. Add does not work properly otherwise.
dropList = ["id", "question_id", "participant_id", "frisian", "dutch", "english"]
annotated1Clean = annotated1.drop(dropList, axis=1)
annotated2Clean = annotated2.drop(dropList, axis=1)
annotated3Clean = annotated3.drop(dropList, axis=1)
annotated4Clean = annotated4.drop(dropList, axis=1)
annotated5Clean = annotated5.drop(dropList, axis=1)

# replace the NaN with 0, this is needed in order to calculate the F1 score, precision and recall later
annotatedDataFrameNoNaN1 = annotated1Clean.fillna(0)
annotatedDataFrameNoNaN2 = annotated2Clean.fillna(0)
annotatedDataFrameNoNaN3 = annotated3Clean.fillna(0)
annotatedDataFrameNoNaN4 = annotated4Clean.fillna(0)
annotatedDataFrameNoNaN5 = annotated5Clean.fillna(0)

# display(annotated1.iloc[0:50])
# transform the annotated data to a numpy array to do the evaluations with
numAnnotated1 = pd.DataFrame.to_numpy(annotatedDataFrameNoNaN1)
numAnnotated2 = pd.DataFrame.to_numpy(annotatedDataFrameNoNaN2)
numAnnotated3 = pd.DataFrame.to_numpy(annotatedDataFrameNoNaN3)
numAnnotated4 = pd.DataFrame.to_numpy(annotatedDataFrameNoNaN4)
numAnnotated5 = pd.DataFrame.to_numpy(annotatedDataFrameNoNaN5)
print(numAnnotated1)
numOf1 = numpy.count_nonzero(numAnnotated1 >= 1)
numOf2 = numpy.count_nonzero(numAnnotated2 >= 1)
numOf3 = numpy.count_nonzero(numAnnotated3 >= 1)
numOf4 = numpy.count_nonzero(numAnnotated4 >= 1)
numOf5 = numpy.count_nonzero(numAnnotated5 >= 1)

# print(numOf1)
# print(numOf2)
# print(numOf3)
# print(numOf4)
# print(numOf5)


FileNotFoundError: [Errno 2] No such file or directory: 'annotatedData/Emotion-2.csv'

In [None]:
from tqdm import tqdm
tqdm.pandas()

def fewPerAnnotator(data) -> str:
    "oneshot method for detecting emotions. A single example is given to the LLM"
    response = ollama.chat(model='llama3', format='json',messages=[
    {
        'role': 'system',
        'content': """  Your task is to identify the top [1-3] key emotions that are most relevant to each annotator's perspective using the provided labels. 
                    From the perspective of each annotator, identify the key emotions that are most strongly represented in their thoughts and feelings. 
                    If no emotion is detected, use the label neutral.
                    Answer with the identified emotions in JSON format, without explanation.
                   """
    },
    {
        'role': 'user',
        'content': """  Text: I would like to see a large field of solar panels, worn by the inhabitants themselves, paid for by the inhabitants themselves, the moment a resident leaves for the area he should be able to sell his part back, new residents should also be able to buy a part. In this way, you as a municipality become self-sufficient while it is affordable for the vast majority of residents. Residents who would not be able to participate financially, must be able to use the solar energy, as they currently purchase electricity from their supplier.
                        \n Labels: """ + labelsToAnnotate 
    },
    {
        'role': 'assistant',
        'content': "Annotator 1: [optimism, desire, caring], Annotator 2: [approval, desire, optimism], Annotator 3: [caring, optimism], Annotator 4: [caring, excitement, optimism], Annotator 5: [approval, caring, desire, excitement, optimism]"
    },
    {
        'role': 'user',
        'content': """  Text: Given the impact of the measures, right behind the front door, it seems better to put the control in the hands of municipalities and residents. The market can go out.
                        \n Labels: """ + labelsToAnnotate 
    },
    {
        'role': 'assistant',
        'content': " Annotator 1: [approval, optimism], Annotator 2: [desire], Annotator 3: [approval, caring, realization], Annotator 4: [nervousness, pride], Annotator 5: [approval, fear, nervousness]"
    },
    {
        'role': 'user',
        'content': """  Text: Here I have given 10 points. A fairly large wind farm is already coming near me, between Zurich and Witmarsum.  I think that's enough for SÃºdwest-FryslÃ¢n.  The idea of making SÃºdwest-FryslÃ¢n the supplier of the Netherlands seems to me to be a bad idea.  We have wind here, so let's limit ourselves to windmills. Then other parts of the country can invest in large-scale solar parks with less wind. Please share the burden.
                        \n Labels: """ + labelsToAnnotate 
    },
    {
        'role': 'assistant',
        'content': " Annotator 1: [annoyance, caring, disapproval], Annotator 2: [annoyance, disapproval], Annotator 3: [annoyance, disapproval], Annotator 4: [anger, annoyance, disapproval], Annotator 5: [annoyance, disappointment, disapproval]"
    },
    {
        'role': 'user',
        'content': "Text: " + data.english + "\n Labels: " + labelsToAnnotate
    },
    ])
    # response is a mapping of: model, created at, message, done, total duration, load duration, prompt eval durationm eval count and eval duration
    # we only want to return the content of the message to be used 
    return pd.Series(response['message']['content'])

# we exclude row 4, 6, and 21 as those are used for giving examples to the LLM.
# these examples got clear labels from the annotators and provide a good overall understanding of the different emotions
test = unannotatedDataFrame.iloc[0:50]
smallTest = test.drop([4, 6, 21])


# apply runs the method on each row of the provided dataframe. Data.english is used to only provide the LLM with the english text
# progress_apply is used to see the progress of calling the method per row, as the LLM takes some time to run it is nice to see the progress

# to create more robust results, the method is run 10 times and the resulting labels are aggregated
# if you want to run the method less times, change the following variable:
# This can be set at the top of the document
# methodRuns = 11
runs = {}
for n in range (1, methodRuns):
    runs["response{0}".format(n)] = smallTest.progress_apply(fewPerAnnotator, axis=1)
    runs["response" + str(n)].columns = ["labels"]
print(runs["response1"])

In [None]:
# this is used to read in the runs
runs = {}
methodRuns = 11

# this part reads each csv to a dataframe and ensures only the labels are copied
current = 'runs/fewPerAnnotatorRuns/run'
for n in range(1, 11):
    csvPath = current + str(n) + '.csv'
    res = pd.read_csv(csvPath, sep='{', header=None)
    res.columns = ["rows", "text"]
    res[["labels", "num"]] = res["text"].str.split("}", expand=True)
    runs["response" + str(n)] = res[["labels"]].copy()

display(runs["response2"])

In [None]:
# From the response we create a dataframe in the same format as our annotated labels
# First we need a list of labels
labelsList = labelsToAnnotate.split(";")
annotator1 = {}
annotator2 = {}
annotator3 = {}
annotator4 = {}
annotator5 = {}

for n in range (1, methodRuns):
   # add a column with a list of the different annotators
   runs["response" + str(n)][["A0", "A1", "A2", "A3", "A4", "A5"]] = runs["response" + str(n)]["labels"].str.split("Annotator", expand=True)
   # now for each annotator their own labels are added to their own dataframe
   # double brackets for [["A1"]] are needed to make sure it copies as a dataframe
   annotator1["run" + str(n)] = runs["response" + str(n)][["A1"]].copy()
   # there are a few instances where the emotions are not given per annotator, that row is given None, which is replaced with ""
   annotator1["run" + str(n)].fillna("", inplace=True)
   annotator1["run" + str(n)].columns = ["labels"]
   annotator2["run" + str(n)] = runs["response" + str(n)][["A2"]].copy()
   annotator2["run" + str(n)].fillna("", inplace=True)
   annotator2["run" + str(n)].columns = ["labels"]
   annotator3["run" + str(n)] = runs["response" + str(n)][["A3"]].copy()
   annotator3["run" + str(n)].fillna("", inplace=True)
   annotator3["run" + str(n)].columns = ["labels"]
   annotator4["run" + str(n)]= runs["response" + str(n)][["A4"]].copy()
   annotator4["run" + str(n)].fillna("", inplace=True)
   annotator4["run" + str(n)].columns = ["labels"]
   annotator5["run" + str(n)] = runs["response" + str(n)][["A5"]].copy()
   annotator5["run" + str(n)].fillna("", inplace=True)
   annotator5["run" + str(n)].columns = ["labels"]
# Then for each method run
for n in range (1, methodRuns):
   # We go over each of these labels
   for label in labelsList:
      # And add a column with the correct name by d[][label]
      # Then for each row we either assign 1 if the label shows up in the text and 0 if the label does not show up in the text at all.
      # As the LLM sometimes gives emotions in the text followed by :null, those are filtered out.
      annotator1["run" + str(n)][label] = annotator1["run" + str(n)].apply(lambda row: 0 if label + "\":null," in row.labels else 1 if "\"" + label in row.labels else  0, axis=1) 
      annotator2["run" + str(n)][label] = annotator2["run" + str(n)].apply(lambda row: 0 if label + "\":null," in row.labels else 1 if "\"" + label in row.labels else  0, axis=1) 
      annotator3["run" + str(n)][label] = annotator3["run" + str(n)].apply(lambda row: 0 if label + "\":null," in row.labels else 1 if "\"" + label in row.labels else  0, axis=1) 
      annotator4["run" + str(n)][label] = annotator4["run" + str(n)].apply(lambda row: 0 if label + "\":null," in row.labels else 1 if "\"" + label in row.labels else  0, axis=1) 
      annotator5["run" + str(n)][label] = annotator5["run" + str(n)].apply(lambda row: 0 if label + "\":null," in row.labels else 1 if "\"" + label in row.labels else  0, axis=1) 
numpy.set_printoptions(threshold=30000)

# This prints the results of the first run
display(annotator5["run1"])

In [None]:
# transform the result data to a numpy array to do evaluation on
frameResults1 = {}
frameResults2 = {}
frameResults3 = {}
frameResults4 = {}
frameResults5 = {}
numResults1 = {}
numResults2 = {}
numResults3 = {}
numResults4 = {}
numResults5 = {}

# For each method run
for n in range (1,methodRuns):
    # We drop the unneeded columns, which are the labels given by the LLM
    frameResults1["response" + str(n)] = annotator1["run" + str(n)].drop(['labels'], axis=1)
    frameResults2["response" + str(n)] = annotator2["run" + str(n)].drop(['labels'], axis=1)
    frameResults3["response" + str(n)] = annotator3["run" + str(n)].drop(['labels'], axis=1)
    frameResults4["response" + str(n)] = annotator4["run" + str(n)].drop(['labels'], axis=1)
    frameResults5["response" + str(n)] = annotator5["run" + str(n)].drop(['labels'], axis=1)
    # then we turn the dataframe into a numpy array
    numResults1["results" + str(n)] = pd.DataFrame.to_numpy(frameResults1["response" + str(n)])
    numResults2["results" + str(n)] = pd.DataFrame.to_numpy(frameResults2["response" + str(n)])
    numResults3["results" + str(n)] = pd.DataFrame.to_numpy(frameResults3["response" + str(n)])
    numResults4["results" + str(n)] = pd.DataFrame.to_numpy(frameResults4["response" + str(n)])
    numResults5["results" + str(n)] = pd.DataFrame.to_numpy(frameResults5["response" + str(n)])

# This prints the cleaned up numpy array results of the first method run of annotator 1
print(numResults1["results1"])

# now we create and write the results to a file
# !! rename the file when new results are created !!
# This should be done at the top, but can be done here as well
# testFile = "oneShotTestAggregate3.txt"
f = open(testFile, "a")
for n in range (1, methodRuns):
    f.write("Results " + str(n) + "\n")
    f.write(pd.DataFrame.to_string(runs["response" + str(n)]) + "\n")
    f.write("Annotator 1" + "\n")
    f.write(numpy.array_str(numResults1["results" + str(n)]) + "\n")
    f.write("Annotator 2" + "\n")
    f.write(numpy.array_str(numResults2["results" + str(n)]) + "\n")
    f.write("Annotator 3" + "\n")
    f.write(numpy.array_str(numResults3["results" + str(n)]) + "\n")
    f.write("Annotator 4" + "\n")
    f.write(numpy.array_str(numResults4["results" + str(n)]) + "\n")
    f.write("Annotator 5" + "\n")
    f.write(numpy.array_str(numResults5["results" + str(n)]) + "\n")
    

Below is the code used to evaluate the results

In [None]:
import numpy as np
from statsmodels.stats import inter_rater as irr
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
# First the methods needed for the evaluation
# This method can be applied to a dataframe to get the actual labels instead of just numbers
def getLabels(data):
    result = ""
    labelsToAnnotate: str = "admiration;amusement;anger;annoyance;approval;caring;confusion;curiosity;desire;disappointment;disapproval;disgust;embarrassment;excitement;fear;gratitude;grief;joy;love;nervousness;optimism;pride;realization;relief;remorse;sadness;suprise;neutral"
    labelsList = labelsToAnnotate.split(";")
    for label in labelsList:
        if data[label] >= 1:
            result = result + ", " + label
    return result

def getNumLabels(resultArray, n):
    return np.count_nonzero(resultArray >= n)

def calculateAgreement(resultArray):
    agg = irr.aggregate_raters(resultArray) # returns a tuple (data, categories)
    resultOneShot = irr.fleiss_kappa(agg[0], method='fleiss')
    return resultOneShot

def calcF1RecallPrecision(annoArray, predictArray):
    eval = np.empty(shape=(0,3))
    f1Score = f1_score(annoArray, predictArray, average="micro")
    recall = recall_score(annoArray, predictArray, average="micro")
    precision = precision_score(annoArray, predictArray, average="micro")
    return np.append(eval, [f1Score, recall, precision])

def calcF1(annoArray, predictArray):
    return f1_score(annoArray, predictArray, average="micro")

def calcRecall(annoArray, predictArray):
    return recall_score(annoArray, predictArray, average="micro")

def calcPrecision(annoArray, predictArray):
    return precision_score(annoArray, predictArray, average="micro")

# The evaluation metrics are calculated over the annotated rows.
# The LLM is run on row 0 till 50, excluding row 4 and produces results in rows 0 to 48.
numOnlyAnno1 = numAnnotated1[0:50]
numOnlyAnno2 = numAnnotated2[0:50]
numOnlyAnno3 = numAnnotated3[0:50]
numOnlyAnno4 = numAnnotated4[0:50]
numOnlyAnno5 = numAnnotated5[0:50]
# as row 4, 6 and 21 are used to give examples, they are removed for the evaluation
numOnlyAnno1 = numpy.delete(numOnlyAnno1, [4, 6, 21], axis=0)
numOnlyAnno2 = numpy.delete(numOnlyAnno2, [4, 6, 21], axis=0)
numOnlyAnno3 = numpy.delete(numOnlyAnno3, [4, 6, 21], axis=0)
numOnlyAnno4 = numpy.delete(numOnlyAnno4, [4, 6, 21], axis=0)
numOnlyAnno5 = numpy.delete(numOnlyAnno5, [4, 6, 21], axis=0)

In [None]:
# This code is used the evaluate the individual runs

import statistics
# current annotator and results
currentAnno = numOnlyAnno5
currentResults = numResults5
# numresults holds the 10 different runs
evalTotal = np.empty(shape=(10,3))
f1Labels = []
recallLabels = []
precLabels = []
numLabels = []
for n in range (1, 11):
    # for each run calculate the f1, recall and precision
    evalTotal[n-1] = calcF1RecallPrecision(currentAnno, currentResults["results" + str(n)])
    # also calculate the number of labels
    numLabels.append(getNumLabels(currentResults["results" + str(n)], 1))
    # also the f1, recall and precision
    f1Labels.append(calcF1(currentAnno, currentResults["results" + str(n)]))
    recallLabels.append(calcRecall(currentAnno, currentResults["results" + str(n)]))
    precLabels.append(calcPrecision(currentAnno, currentResults["results" + str(n)]))
# With the precision and total labels the correct and incorrect labels can be calculated
correctLabels = np.multiply(precLabels, numLabels)
incorrectLabels = np.subtract(numLabels, correctLabels)
f1Mean = statistics.mean(f1Labels)
f1Dev = np.std(f1Labels)
recMean = statistics.mean(recallLabels)
recDev = np.std(recallLabels)
precMean = statistics.mean(precLabels)
precDev = np.std(precLabels)
# print(precLabels)
print("f1")
print(f1Mean)
print(f1Dev)
print("recall")
print(recMean)
print(recDev)
print("precision")
print(precMean)
print(precDev)
# Next is calculating the mean and the standard deviation
meanLabels = statistics.mean(numLabels)
stDev = np.std(numLabels)
meanLabelsCor = statistics.mean(correctLabels)
stDevCor = np.std(correctLabels)
meanLabelsIncor = statistics.mean(incorrectLabels)
stDevinCor = np.std(incorrectLabels)
# print(numLabels)
print("total labels")
print(meanLabels)
print(stDev)
# # print(correctLabels)
# print(meanLabelsCor)
# print(stDevCor)
# # print(incorrectLabels)
# print(meanLabelsIncor)
# print(stDevinCor)

In [None]:
# The code below is used to evaluate the aggregated results

# Now we need to aggregate the results from the different method runs
# We start by adding all the numpy arrays together
sumResults1 =  numResults1["results1"]
sumResults2 =  numResults2["results1"]
sumResults3 =  numResults3["results1"]
sumResults4 =  numResults4["results1"]
sumResults5 =  numResults5["results1"]
for n in range (2, methodRuns):
    sumResults1 = numpy.add(sumResults1, numResults1["results" + str(n)])
    sumResults2 = numpy.add(sumResults2, numResults2["results" + str(n)])
    sumResults3 = numpy.add(sumResults3, numResults3["results" + str(n)])
    sumResults4 = numpy.add(sumResults4, numResults4["results" + str(n)])
    sumResults5 = numpy.add(sumResults5, numResults5["results" + str(n)])

print(sumResults1)
f.write("Aggregated results 1" + "\n" + numpy.array_str(sumResults1))
f.write("Aggregated results 2" + "\n" + numpy.array_str(sumResults2))
f.write("Aggregated results 3" + "\n" + numpy.array_str(sumResults3))
f.write("Aggregated results 4" + "\n" + numpy.array_str(sumResults4))
f.write("Aggregated results 5" + "\n" + numpy.array_str(sumResults5))

f.write("Number of runs: " + str(methodRuns))
# once they are summed, a threshold is applied
# This threshold represents the amount of runs that need to have given a label for a piece of text in order to be considered.
for n in range (1, methodRuns):
    sumThreshResult1 = numpy.where(sumResults1 < n, 0, 1)
    sumThreshResult2 = numpy.where(sumResults2 < n, 0, 1)
    sumThreshResult3 = numpy.where(sumResults3 < n, 0, 1)
    sumThreshResult4 = numpy.where(sumResults4 < n, 0, 1)
    sumThreshResult5 = numpy.where(sumResults5 < n, 0, 1)
    print("Threshold: " + str(n))
    f.write("\nThreshold: " + str(n))
    # Now that all of the data is properly prepared, the micro F1 score, recall and precision are calculated
    f1ScoreAnn1 = f1_score(numOnlyAnno1, sumThreshResult1, average="micro")
    recallAnn1 = recall_score(numOnlyAnno1, sumThreshResult1, average="micro")
    precisionAnn1 = precision_score(numOnlyAnno1, sumThreshResult1, average="micro")
    print("micro f1 score annotator 1: " + str(f1ScoreAnn1))
    print("recall annotator 1: " + str(recallAnn1))
    print("precision annotator 1: " + str(precisionAnn1))
    f.write("\nMicro F1 annotator 1: " + str(f1ScoreAnn1))
    f.write("\nMicro recall annotator 1: " + str(recallAnn1))
    f.write("\nMicro precision annotator 1: " + str(precisionAnn1))

    f1ScoreAnn2 = f1_score(numOnlyAnno2, sumThreshResult2, average="micro")
    recallAnn2 = recall_score(numOnlyAnno2, sumThreshResult2, average="micro")
    precisionAnn2 = precision_score(numOnlyAnno2, sumThreshResult2, average="micro")
    print("micro f1 score annotator 2: " + str(f1ScoreAnn2))
    print("recall annotator 2: " + str(recallAnn2))
    print("precision annotator 2: " + str(precisionAnn2))
    f.write("\nMicro F1 annotator 2: " + str(f1ScoreAnn2))
    f.write("\nMicro recall annotator 2: " + str(recallAnn2))
    f.write("\nMicro precision annotator 2: " + str(precisionAnn2))

    f1ScoreAnn3 = f1_score(numOnlyAnno3, sumThreshResult3, average="micro")
    recallAnn3 = recall_score(numOnlyAnno3, sumThreshResult3, average="micro")
    precisionAnn3 = precision_score(numOnlyAnno3, sumThreshResult3, average="micro")
    print("micro f1 score annotator 3: " + str(f1ScoreAnn3))
    print("recall annotator 3: " + str(recallAnn3))
    print("precision annotator 3: " + str(precisionAnn3))
    f.write("\nMicro F1 annotator 3: " + str(f1ScoreAnn3))
    f.write("\nMicro recall annotator 3: " + str(recallAnn3))
    f.write("\nMicro precision annotator 3: " + str(precisionAnn3))

    f1ScoreAnn4 = f1_score(numOnlyAnno4, sumThreshResult4, average="micro")
    recallAnn4 = recall_score(numOnlyAnno4, sumThreshResult4, average="micro")
    precisionAnn4 = precision_score(numOnlyAnno4, sumThreshResult4, average="micro")
    print("micro f1 score annotator 4: " + str(f1ScoreAnn4))
    print("recall annotator 4: " + str(recallAnn4))
    print("precision annotator 4: " + str(precisionAnn4))
    f.write("\nMicro F1 annotator 4: " + str(f1ScoreAnn4))
    f.write("\nMicro recall annotator 4: " + str(recallAnn4))
    f.write("\nMicro precision annotator 4: " + str(precisionAnn4))

    f1ScoreAnn5 = f1_score(numOnlyAnno5, sumThreshResult5, average="micro")
    recallAnn5 = recall_score(numOnlyAnno5, sumThreshResult5, average="micro")
    precisionAnn5 = precision_score(numOnlyAnno5, sumThreshResult5, average="micro")
    print("micro f1 score annotator 5: " + str(f1ScoreAnn5))
    print("recall annotator 5: " + str(recallAnn5))
    print("precision annotator 5: " + str(precisionAnn5))
    f.write("\nMicro F1 annotator 5: " + str(f1ScoreAnn5))
    f.write("\nMicro recall annotator 5: " + str(recallAnn5))
    f.write("\nMicro precision annotator 5: " + str(precisionAnn5))
f.close()


In [None]:
# # counting occurences
import numpy as np
annotatorsSummed = [sumResults1, sumResults2, sumResults3, sumResults4, sumResults5]

for a in annotatorsSummed:
    numOf1 = np.count_nonzero(a >= 1)
    numOf2 = np.count_nonzero(a >= 2)
    numOf3 = np.count_nonzero(a >= 3)
    numOf4 = np.count_nonzero(a >= 4)
    numOf5 = np.count_nonzero(a >= 5)
    numOf6 = np.count_nonzero(a >= 6)
    numOf7 = np.count_nonzero(a >= 7)
    numOf8 = np.count_nonzero(a >= 8)
    numOf9 = np.count_nonzero(a >= 9)
    numOf10 = np.count_nonzero(a >= 10)
    print("Labels for threshold 1: " + str(numOf1))
    print("Labels for threshold 2: " + str(numOf2))
    print("Labels for threshold 3: " + str(numOf3))
    print("Labels for threshold 4: " + str(numOf4))
    print("Labels for threshold 5: " + str(numOf5))
    print("Labels for threshold 6: " + str(numOf6))
    print("Labels for threshold 7: " + str(numOf7))
    print("Labels for threshold 8: " + str(numOf8))
    print("Labels for threshold 9: " + str(numOf9))
    print("Labels for threshold 10: " + str(numOf10))



The code below can be used to compare calculate results compared to zeroshot, so when the llm is given no examples.

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
zeroShotArray = numpy.array([[1, 1, 4, 0, 4, 1, 2, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4]
, [0, 0, 0, 0, 4, 4, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6]
, [1, 0, 2, 0, 4, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5]
, [3, 0, 0, 0, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
, [7, 0, 0, 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]
, [0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6]
, [0, 0, 10, 0, 8, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3]
, [0, 0, 9, 1, 7, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4]
, [9, 0, 0, 0, 10, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
, [10, 0, 0, 0, 10, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4]
, [0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9]
, [0, 0, 9, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
, [1, 0, 1, 0, 8, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3]
, [0, 0, 0, 0, 7, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3]
, [1, 0, 2, 0, 5, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5]
, [0, 0, 1, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9]
, [0, 0, 0, 0, 6, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3]
, [9, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
, [0, 0, 9, 1, 8, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
, [3, 0, 3, 0, 4, 0, 4, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7]
, [3, 0, 6, 0, 8, 0, 0, 0, 0, 0, 7, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
, [1, 0, 1, 0, 8, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4]
, [0, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]
, [0, 0, 10, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
, [3, 0, 1, 0, 5, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4]
, [0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7]
, [0, 0, 1, 0, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
, [0, 0, 8, 3, 10, 0, 0, 0, 0, 0, 10, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]
, [0, 0, 8, 0, 5, 0, 0, 0, 0, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
, [7, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
, [7, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
, [1, 1, 4, 1, 8, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2]
, [1, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5]
, [5, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
, [0, 0, 8, 0, 9, 1, 0, 0, 0, 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
, [9, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
, [1, 0, 1, 0, 8, 7, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
, [0, 0, 7, 3, 4, 0, 1, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
, [1, 0, 2, 0, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2]
, [8, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3]
, [5, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 4]
, [0, 0, 9, 0, 3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3]
, [4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
, [5, 0, 0, 10, 4, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
, [1, 0, 2, 7, 1, 0, 2, 3, 0, 6, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]
, [4, 0, 0, 0, 9, 1, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6]
, [3, 0, 0, 0, 9, 2, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3]
, [0, 0, 7, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6]
, [1, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9]
, [9, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])
sumResults1 =  zeroShotArray
sumResults2 =  zeroShotArray
sumResults3 =  zeroShotArray
sumResults4 =  zeroShotArray
sumResults5 =  zeroShotArray

numOnlyAnno1 = numAnnotated1[0:50]
numOnlyAnno2 = numAnnotated2[0:50]
numOnlyAnno3 = numAnnotated3[0:50]
numOnlyAnno4 = numAnnotated4[0:50]
numOnlyAnno5 = numAnnotated5[0:50]

# controll
for n in range (1, methodRuns):
    sumThreshResult1 = numpy.where(sumResults1 < n, 0, 1)
    sumThreshResult2 = numpy.where(sumResults2 < n, 0, 1)
    sumThreshResult3 = numpy.where(sumResults3 < n, 0, 1)
    sumThreshResult4 = numpy.where(sumResults4 < n, 0, 1)
    sumThreshResult5 = numpy.where(sumResults5 < n, 0, 1)
    print("Threshold: " + str(n))
    # Now that all of the data is properly prepared, the micro F1 score, recall and precision are calculated
    f1ScoreAnn1 = f1_score(numOnlyAnno1, sumThreshResult1, average="micro")
    recallAnn1 = recall_score(numOnlyAnno1, sumThreshResult1, average="micro")
    precisionAnn1 = precision_score(numOnlyAnno1, sumThreshResult1, average="micro")
    print("micro f1 score annotator 1: " + str(f1ScoreAnn1))
    print("recall annotator 1: " + str(recallAnn1))
    print("precision annotator 1: " + str(precisionAnn1))


    f1ScoreAnn2 = f1_score(numOnlyAnno2, sumThreshResult2, average="micro")
    recallAnn2 = recall_score(numOnlyAnno2, sumThreshResult2, average="micro")
    precisionAnn2 = precision_score(numOnlyAnno2, sumThreshResult2, average="micro")
    print("micro f1 score annotator 2: " + str(f1ScoreAnn2))
    print("recall annotator 2: " + str(recallAnn2))
    print("precision annotator 2: " + str(precisionAnn2))


    f1ScoreAnn3 = f1_score(numOnlyAnno3, sumThreshResult3, average="micro")
    recallAnn3 = recall_score(numOnlyAnno3, sumThreshResult3, average="micro")
    precisionAnn3 = precision_score(numOnlyAnno3, sumThreshResult3, average="micro")
    print("micro f1 score annotator 3: " + str(f1ScoreAnn3))
    print("recall annotator 3: " + str(recallAnn3))
    print("precision annotator 3: " + str(precisionAnn3))


    f1ScoreAnn4 = f1_score(numOnlyAnno4, sumThreshResult4, average="micro")
    recallAnn4 = recall_score(numOnlyAnno4, sumThreshResult4, average="micro")
    precisionAnn4 = precision_score(numOnlyAnno4, sumThreshResult4, average="micro")
    print("micro f1 score annotator 4: " + str(f1ScoreAnn4))
    print("recall annotator 4: " + str(recallAnn4))
    print("precision annotator 4: " + str(precisionAnn4))

    f1ScoreAnn5 = f1_score(numOnlyAnno5, sumThreshResult5, average="micro")
    recallAnn5 = recall_score(numOnlyAnno5, sumThreshResult5, average="micro")
    precisionAnn5 = precision_score(numOnlyAnno5, sumThreshResult5, average="micro")
    print("micro f1 score annotator 5: " + str(f1ScoreAnn5))
    print("recall annotator 5: " + str(recallAnn5))
    print("precision annotator 5: " + str(precisionAnn5))
  


In [None]:
# This part finds and prints the labels annotated by the annotators
rowLabels = annotatedDataFrameNoNaN3.iloc[0:50]
labelsToAnnotate: str = "admiration;amusement;anger;annoyance;approval;caring;confusion;curiosity;desire;disappointment;disapproval;disgust;embarrassment;excitement;fear;gratitude;grief;joy;love;nervousness;optimism;pride;realization;relief;remorse;sadness;suprise;neutral"
labelsList = labelsToAnnotate.split(";")

def getLabels(data):
    result = ""
    for label in labelsList:
        if data[label] >= 1:
            result = result + ", " + label
    return result

labelsNeeded = rowLabels.apply(getLabels, axis=1)
pd.set_option('display.max_colwidth', None)
display(labelsNeeded)