In [None]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
#####################################################################
#             Generalising on Multi Label Level                     #
#####################################################################

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
import pandas as pd
import numpy as np
from collections import Counter

from utilities.preprocess import Preproccesor
from utilities.attention_layer import Attention
from utilities.helping_functions import create_embedding_matrix

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import f1_score, balanced_accuracy_score, confusion_matrix
from sklearn.utils import shuffle
from sklearn.model_selection import KFold

from keras.preprocessing.sequence import pad_sequences
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, SpatialDropout1D, Bidirectional, Dense, \
    LSTM, Conv1D, Dropout, concatenate
from keras import Input, Model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam
from keras.layers.embeddings import Embedding
from keras.preprocessing.text import Tokenizer

import nltk
nltk.download('wordnet')
nltk.download('stopwords')

In [2]:
from sklearn.metrics import f1_score, accuracy_score, hamming_loss, make_scorer, fbeta_score, multilabel_confusion_matrix,\
    average_precision_score, precision_score, recall_score


def average_precision_wrapper(y, y_pred, view):
    return average_precision_score(y, y_pred.toarray(), average=view)


hamm_scorer = make_scorer(hamming_loss, greater_is_better=False)
ftwo_scorer = make_scorer(fbeta_score, beta=2)

Loading ETHOS and the dataset D2: Ousidhoum, Nedjma, et al. "Multilingual and multi-aspect hate speech analysis." arXiv preprint arXiv:1908.11049 (2019).

We will load our data without preprocessing them!

In [3]:
X, yt, y = Preproccesor.load_multi_label_data(True, False)  
# yt has continuous data, y has binary
V, v_s, v_d, v_a, v_t, v_g, v_n = Preproccesor.load_mlma(True, False)
label_names = ["violence", "directed_vs_generalized", "gender", "race",
               "national_origin", "disability", "religion", "sexual_orientation"]

print("Targets contained:", set(v_t))

Targets contained: {'origin', 'religion', 'disability', 'sexual_orientation', 'other', 'gender'}


In [4]:
"Hostility:", set(v_s)

('Hostility:',
 {'abusive',
  'abusive_disrespectful',
  'abusive_disrespectful_hateful_normal',
  'abusive_hateful',
  'abusive_normal',
  'abusive_offensive',
  'abusive_offensive_disrespectful_hateful_normal',
  'abusive_offensive_disrespectful_normal',
  'abusive_offensive_hateful_disrespectful',
  'abusive_offensive_hateful_disrespectful_normal',
  'abusive_offensive_hateful_normal',
  'abusive_offensive_normal',
  'disrespectful',
  'disrespectful_hateful',
  'disrespectful_normal',
  'fearful',
  'fearful_abusive',
  'fearful_abusive_disrespectful_hateful_normal',
  'fearful_abusive_disrespectful_normal',
  'fearful_abusive_hateful_disrespectful',
  'fearful_abusive_hateful_disrespectful_normal',
  'fearful_abusive_hateful_normal',
  'fearful_abusive_offensive_disrespectful',
  'fearful_abusive_offensive_disrespectful_normal',
  'fearful_abusive_offensive_hateful',
  'fearful_abusive_offensive_hateful_disrespectful',
  'fearful_abusive_offensive_hateful_normal',
  'fearful_abusi

We create our labels for the D2, based on the following rules. Of course sadly, it possible contains errors

In [5]:
y_v = []
for i in range(len(V)):
    temp_y = []
    for j in range(len(label_names)):
        if j == 0:
            if 'abusive' in v_s[i] or 'hateful' in v_s[i] or 'fearful' in v_s[i]:
                temp_y.append(1)
            else:
                temp_y.append(0)
        elif j == 1:
            if 'indirect' in v_d[i]:
                temp_y.append(0)
            else:
                temp_y.append(1)
    temp_c = [0]*6
    if 'gender' in v_t[i]:
        temp_c[0] = 1
    if 'race' in v_t[i] or 'race' in v_g[i] or 'asians' in v_g[i] or 'african_descent' in v_g[i]:
        temp_c[1] = 1
    if 'origin' in v_t[i]:
        temp_c[2] = 1
    if 'disability' in v_t[i]:
        temp_c[3] = 1
    if 'religion' in v_t[i]:
        temp_c[4] = 1
    if 'sexual_orientation' in v_t[i]:
        temp_c[5] = 1
    for k in temp_c:
        temp_y.append(k)
    y_v.append(temp_y)
y_valid = np.array(y_v)

Let's build the embedding matrix

In [6]:
max_features = 15000
max_len = 150
emb_ma = 1
embed_size = 150

tk = Tokenizer(lower=True, filters='', num_words=max_features, oov_token=True)
tk.fit_on_texts(np.concatenate((X, V)))
tokenized = tk.texts_to_sequences(X)
x_train = pad_sequences(tokenized, maxlen=max_len)
tokenized = tk.texts_to_sequences(V)
x_valid = pad_sequences(tokenized, maxlen=max_len)
embedding_matrix = create_embedding_matrix(emb_ma, tk, max_features)
embedding_matrix.shape

(15001, 300)

We create our model:

In [7]:
file_path = "final.hdf5"
check_point = ModelCheckpoint(
    file_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min")
early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=50)
main_input1 = Input(shape=(max_len,), name='main_input1')
x1 = (Embedding(max_features + 1, 300, input_length=max_len,
                weights=[embedding_matrix], trainable=False))(main_input1)
x1 = SpatialDropout1D(0.4)(x1)
x2 = Bidirectional(LSTM(75, dropout=0.5, return_sequences=True))(x1)
x = Dropout(0.55)(x2)
x = Bidirectional(LSTM(50, dropout=0.5, return_sequences=True))(x)
hidden = concatenate([
    Attention(max_len)(x1),
    Attention(max_len)(x2),
    Attention(max_len)(x)
])
hidden = Dense(32, activation='selu')(hidden)
hidden = Dropout(0.5)(hidden)
hidden = Dense(16, activation='selu')(hidden)
hidden = Dropout(0.5)(hidden)
output_lay1 = Dense(8, activation='sigmoid')(hidden)
model = Model(inputs=[main_input1], outputs=output_lay1)
model.compile(loss="binary_crossentropy", optimizer=Adam(),
              metrics=['binary_accuracy'])

We train it

In [8]:
#model.fit(x_train, y, validation_data=(x_valid, y_valid),
#          batch_size=64, epochs=150, verbose=1, shuffle=True, callbacks=[check_point,early_stop])

If the model is trained already, we can just load our weights:

In [9]:
model.load_weights(filepath='weights/ethos_multi_label.hdf5')
model.compile(loss="binary_crossentropy", optimizer=Adam(), metrics=['binary_accuracy'])

Finally, we predict the instances of D2

In [10]:
y_pr = model.predict(x_train)
vpp = model.predict(x_valid)

# Let's evaluate the results:

In [None]:
y_results = {}
y_results["Data"] = []
y_results["F1 Samples"] = []
y_results["F1 Macro"] = []
y_results["F1 Micro"] = []
y_results["Pr Samples"] = []
y_results["Pr Macro"] = []
y_results["Pr Micro"] = []
y_results["Re Samples"] = []
y_results["Re Macro"] = []
y_results["Re Micro"] = []
y_results["Accuracy"] = []
y_results["Hamming"] = []

def results(y_t, y_p, y_results, name):
    thr = 0.367
    y_pr = []
    for i in y_p:
        temp_y = []
        cou = 0
        for j in i:
            if j > thr:
                temp_y.append(1)
            else:
                temp_y.append(0)
            cou = cou + 1
        y_pr.append(temp_y)
    y_p = np.array(y_pr)
    y_results["Data"].append(name)
    y_results["F1 Samples"].append(f1_score(y_t, y_p, average='samples'))
    y_results["F1 Macro"].append(f1_score(y_t, y_p, average='macro'))
    y_results["F1 Micro"].append(f1_score(y_t, y_p, average='micro'))
    y_results["Pr Samples"].append(
        precision_score(y_t, y_p, average='samples'))
    y_results["Pr Macro"].append(precision_score(y_t, y_p, average='macro'))
    y_results["Pr Micro"].append(precision_score(y_t, y_p, average='micro'))
    y_results["Re Samples"].append(recall_score(y_t, y_p, average='samples'))
    y_results["Re Macro"].append(recall_score(y_t, y_p, average='macro'))
    y_results["Re Micro"].append(recall_score(y_t, y_p, average='micro'))
    y_results["Accuracy"].append(accuracy_score(y_t, y_p))
    y_results["Hamming"].append(hamming_loss(y_t, y_p))
    return y_results

y_results = results(y, y_pr, y_results, 'Train')
y_results = results(y_valid, vpp, y_results, 'Valid')

Here are the results:

In [13]:
for k, v in y_results.items():
    if k == 'Data':
        print("{}\t{}\t{}".format(k+'      ', v[0], v[1]))
    elif k == 'Hamming':
        print("{}\t{}\t{}".format(k+' ', round(v[0], 3), round(v[1], 3)))
    else:
        print("{}\t{}\t{}".format(k, round(v[0], 3), round(v[1], 3)))

Data      	Train	Valid
F1 Samples	0.891	0.385
F1 Macro	0.92	0.406
F1 Micro	0.89	0.416
Pr Samples	0.931	0.401
Pr Macro	0.931	0.405
Pr Micro	0.905	0.378
Re Samples	0.894	0.426
Re Macro	0.909	0.517
Re Micro	0.875	0.461
Accuracy	0.67	0.175
Hamming 	0.045	0.206


Let's see few examples:

In [14]:
for i in range(3):
    print('Instance:', V[i])
    print('  Sentiment:', v_s[i])
    print('    Incites Violence:', vpp[i][0])
    print('    No Violence:', 1-vpp[i][0])
    print('  Scope:', v_d[i])
    print('    Direct:', vpp[i][1])
    print('    Generalized:', 1-vpp[i][1])
    print('  Hate Group:', v_t[i], v_g[i])
    print('    Gender:', vpp[i][2])
    print('    Race:', vpp[i][3])
    print('    Origin:', vpp[i][4])
    print('    Disability:', vpp[i][5])
    print('    Religion:', vpp[i][6])
    print('    Sexual Orientation:', vpp[i][7])
    print()

Instance: if america had another years of obama ideology via hillary we would be well on our way to being shithole country
  Sentiment: fearful_abusive_hateful_disrespectful_normal
    Incites Violence: 0.12612055
    No Violence: 0.8738794475793839
  Scope: indirect
    Direct: 0.047181424
    Generalized: 0.9528185762465
  Hate Group: origin other
    Gender: 0.0036667627
    Race: 0.014548816
    Origin: 0.84192735
    Disability: 0.01104671
    Religion: 0.03923865
    Sexual Orientation: 0.0065618805

Instance: most canadians have never met seen or associated with person who are currently labelled as retarded plan u2026 url
  Sentiment: offensive
    Incites Violence: 0.15779072
    No Violence: 0.8422092795372009
  Scope: indirect
    Direct: 0.11329159
    Generalized: 0.8867084085941315
  Hate Group: disability special_needs
    Gender: 0.0217287
    Race: 0.026790421
    Origin: 0.71264493
    Disability: 0.16527565
    Religion: 0.019782986
    Sexual Orientation: 0.01995425


The results per label:

In [15]:
for l in range(len(label_names)):
    print(label_names[l])
    y_t = y_valid[:, l:l+1]
    y_t = np.array([k[0] for k in y_t])
    y_p = vpp[:, l:l+1]
    y_p = np.array([1 if k[0] > 0.34375 else 0 for k in y_p])
    print(' MLMA Accuracy', round(balanced_accuracy_score(y_t, y_p), 4))
    print(' MLMA F1', round(f1_score(y_t, y_p, average='weighted'), 4))
    print(' Train F1 NH:', round(f1_score(y_t, y_p, average=None)[
          0], 4), 'H:', round(f1_score(y_t, y_p, average=None)[1], 4))

violence
 MLMA Accuracy 0.5086
 MLMA F1 0.5948
 Train F1 NH: 0.725 H: 0.2909
directed_vs_generalized
 MLMA Accuracy 0.5528
 MLMA F1 0.5539
 Train F1 NH: 0.5936 H: 0.1998
gender
 MLMA Accuracy 0.7034
 MLMA F1 0.8771
 Train F1 NH: 0.9294 H: 0.4659
race
 MLMA Accuracy 0.7597
 MLMA F1 0.9278
 Train F1 NH: 0.9461 H: 0.2406
national_origin
 MLMA Accuracy 0.6788
 MLMA F1 0.6897
 Train F1 NH: 0.7489 H: 0.6123
disability
 MLMA Accuracy 0.6964
 MLMA F1 0.838
 Train F1 NH: 0.9106 H: 0.5344
religion
 MLMA Accuracy 0.7165
 MLMA F1 0.9765
 Train F1 NH: 0.9851 H: 0.2731
sexual_orientation
 MLMA Accuracy 0.8983
 MLMA F1 0.9421
 Train F1 NH: 0.965 H: 0.7129


# Explanations on low performing 'race' label:

Data ethos did not classify as race, but they contained hate speech towards race (according D2)

In [16]:
for i in range(len(y)):
    if y_valid[i][3] == 1 and vpp[i][3] < 0.36:
        print(i, V[i])

96  well my parents like carikla ching chong guy in your college
166 yay kelas ching chong today
254  okay ching chong
335 remember it was some ching chong hoe on here that was flexin on him years ago found out they was fuckin smh


Date ethos predicted as hate speech content towards race. However, D2 did not had the race label.

In [17]:
cou = 0
for i in range(len(y)):
    if y_valid[i][3] == 0 and vpp[i][3] > 0.36:
        print(i, V[i])
        cou = cou + 1
cou

44  so many dead and illegal aliens will vote blue in november dems aren worried
56 literally waited at red light for five minutes am such fucking retard
89 just found out that everyones origins are from africa am aloud to say nigger nohatejustasking
121 lol you know what mean nigger got caught again url
124  nigga it started lagging so we left retard
126 remember negro my open mind got skin door wayne
128   love you negro
132 keep in mind negro so my open mind got screen door impressive carterv
159 when white says wassup nigger because you made video calling yourself nigger url
175 it is okay to be white auspol url
192 black youtubers will you be demonetized if you say nigger really wanna say nigger on my channel
198  this shooting is totally leftist setup and an attack on our second amendment rights find the perps u2026 url
200 it okay to be white motion suspiciously delayed url
212 my baby told me was beautiful with my bare face and nigger twist url
216 in pedo speak hotdogs means l

35