In [1]:
import os
import sys
import numpy as np
from sklearn.metrics import hamming_loss, accuracy_score, precision_score, recall_score, f1_score

In [2]:
preds = np.load('./npy_files/test_preds_silvery-plant-35.npy')
labels = np.load('./npy_files/test_labels_silvery-plant-35.npy')

In [3]:
preds = 1/(1 + np.exp(-preds))

In [4]:
def hamming_score(y_true, y_pred):
    acc_list = []
    for i in range(y_true.shape[0]):
        set_true = set( np.where(y_true[i])[0] )
        set_pred = set( np.where(y_pred[i])[0] )
        tmp_a = None
        if len(set_true) == 0 and len(set_pred) == 0:
            tmp_a = 1
        else:
            tmp_a = len(set_true.intersection(set_pred))/\
                    float( len(set_true.union(set_pred)) )
        acc_list.append(tmp_a)
    
    return np.mean(acc_list)

In [5]:
for thresh in np.linspace(0, 1, 11):
    copy_preds = preds.copy()
    copy_preds[copy_preds >= thresh] = 1
    copy_preds[copy_preds < thresh] = 0
    print("Thresh:", thresh, "\tHamming Loss:", hamming_loss(labels, copy_preds), "\tHamming Score:", hamming_score(labels, copy_preds))

Thresh: 0.0 	Hamming Loss: 0.7951690821256039 	Hamming Score: 0.20483091787439614
Thresh: 0.1 	Hamming Loss: 0.1391304347826087 	Hamming Score: 0.7004830917874396
Thresh: 0.2 	Hamming Loss: 0.1294685990338164 	Hamming Score: 0.7037037037037038
Thresh: 0.30000000000000004 	Hamming Loss: 0.1178743961352657 	Hamming Score: 0.7157809983896941
Thresh: 0.4 	Hamming Loss: 0.1111111111111111 	Hamming Score: 0.7238325281803542
Thresh: 0.5 	Hamming Loss: 0.10434782608695652 	Hamming Score: 0.7286634460547503
Thresh: 0.6000000000000001 	Hamming Loss: 0.10821256038647344 	Hamming Score: 0.6972624798711754
Thresh: 0.7000000000000001 	Hamming Loss: 0.10917874396135266 	Hamming Score: 0.6634460547504025
Thresh: 0.8 	Hamming Loss: 0.10917874396135266 	Hamming Score: 0.6344605475040257
Thresh: 0.9 	Hamming Loss: 0.10917874396135266 	Hamming Score: 0.5821256038647343
Thresh: 1.0 	Hamming Loss: 0.20483091787439614 	Hamming Score: 0.0


In [6]:
preds[preds >= 0.5] = 1
preds[preds < 0.5] = 0

In [7]:
mapping = {0: "Organisation", 1: "Location", 2: "Individual", 3: "Community", 4: "None"}
for i in range(0, 5):
    print(f"{mapping[i]} Precision: {precision_score(labels[:, i], preds[:, i])} Recall: {recall_score(labels[:, i], preds[:, i])} F1 Score: {f1_score(labels[:, i], preds[:, i])}")

Organisation Precision: 0.6666666666666666 Recall: 0.36363636363636365 F1 Score: 0.4705882352941177
Location Precision: 0.4444444444444444 Recall: 0.3333333333333333 F1 Score: 0.380952380952381
Individual Precision: 0.6666666666666666 Recall: 0.6538461538461539 F1 Score: 0.6601941747572816
Community Precision: 0.47058823529411764 Recall: 0.45714285714285713 F1 Score: 0.46376811594202894
None Precision: 0.9230769230769231 Recall: 0.9411764705882353 F1 Score: 0.9320388349514563


In [13]:
idx = []
for i in range(len(preds)):
    if (preds[i] != labels[i]).any():
        idx.append(i)

In [18]:
import pandas as pd

In [22]:
df = pd.read_csv('data/hate_towards_whom_aug/test.csv')
df = df.iloc[idx]

In [27]:
[label for label in labels[idx].tolist()]

[[0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 1.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 1.0],
 [1.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 1.0],
 [1.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 1.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0],
 [0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0

In [31]:
df['prediction'] = [",".join([mapping[i] for i, _pred in enumerate(pred) if _pred == 1]) for pred in preds[idx].tolist()]

In [34]:
df.tail(50)

Unnamed: 0,url,category,comment,label,target,prediction
63,https://www.bitchute.com/video/IqAY5vQy4IE4/,news & politics,Now that's how railroads of developed country ...,yes,Location,"Location,Community"
69,https://www.youtube.com/watch?v=VH0rNhUKOto,sports,This what happens when you just inherited mone...,yes,Individual,Community
71,https://www.youtube.com/watch?v=dExrKx7hE6A,health,The truth will shine it’s brightness through t...,yes,Organisation,Individual
74,https://www.youtube.com/watch?v=mLTBW7E6q4w,news & politics,She Mark Zuckerberg dressed in Drag. Don’t be ...,yes,Individual,"Individual,Community"
75,https://www.youtube.com/watch?v=PCVXj4g10j8,sports,aight mate I'm on my way to your house with an...,yes,Individual,Location
77,https://www.youtube.com/watch?v=W6AnCcxsRSk,religion,"This is absolutely shame, Indians giving z plu...",yes,Location,"Individual,Community"
83,https://www.bitchute.com/video/2HGLRJWmq7U/,news & politics,That Asian chinese woman has to go back too.,yes,Individual,Community
84,https://www.youtube.com/watch?v=iNxJf6V6nds,entertainment,Because of westerners,yes,Community,Individual
97,https://www.youtube.com/watch?v=duY9YfA-kuI&ab...,entertainment,John Oliver is living proof that the English c...,yes,Individual,"Individual,Community"
103,https://www.youtube.com/watch?v=mnQBiEw_W1g,news & politics,These opposition states will drive us to Slavery,yes,Organisation,Location
