Here I analyze the correlation between speech acts and sentiment. More precisely, I compute the Phi coefficient between assertives/expressives and neutral/non-neutral sentiment.

In [8]:
from context import speechact
import speechact.classifier.embedding as emb
import speechact.classifier.base as b
import speechact.evaluation as evaluation
import speechact.corpus as corp
import speechact.annotate as anno
from speechact.annotate import SpeechActLabels
from speechact import Sentiment
import matplotlib.pyplot as plt

In [14]:
import numpy as np
import scipy.stats as st

class ContingencyTable:

    def __init__(self) -> None:
        self.neutral_expressives = 0
        self.non_neutral_expressives = 0
        self.neutral_assertives = 0
        self.non_neutral_assertives = 0
    
    def to_matrix(self) -> np.ndarray:
        return np.array([[self.non_neutral_expressives, self.non_neutral_assertives], 
                         [self.neutral_expressives, self.neutral_assertives]])

def phi_coefficient(observed: np.ndarray):
    chi2, p, _, _ = st.chi2_contingency(observed)
    n = observed.sum()
    phi = np.sqrt(chi2 / n)
    return phi, p

#phi = phi_coefficient(observed)
#print("Phi coefficient:", phi)

# Examine the Data

In [16]:
corpus = corp.Corpus('../data/annotated data/dev-set-sentiment.conllu.bz2')
observations = ContingencyTable()

for sentence in corpus.sentences():
    sentiment_label = sentence.get_meta_data('sentiment_label')
    speech_act = sentence.speech_act

    if speech_act == SpeechActLabels.ASSERTION:
        if sentiment_label == Sentiment.NEUTRAL:
            observations.neutral_assertives += 1
        else:
            observations.non_neutral_assertives += 1
    elif speech_act == SpeechActLabels.EXPRESSIVE:
        if sentiment_label == Sentiment.NEUTRAL:
            observations.neutral_expressives += 1
        else:
            observations.non_neutral_expressives += 1


phi, p = phi_coefficient(observations.to_matrix())
print(observations.to_matrix())
print(f'Phi coefficient = {phi}, and p = {p}')

[[ 242  161]
 [ 255 1194]]
Phi coefficient = 0.39382598550763187, and p = 1.9826824244355964e-64
