In [1]:
import os
import sys
import numpy as np
from sklearn.metrics import hamming_loss, accuracy_score, precision_score, recall_score, f1_score

In [2]:
NPY_FILE_PATH = "../npy_files/7Jun_vision"

filenames = os.listdir(path=NPY_FILE_PATH)
filenames = sorted(filenames, key=lambda x: x.split('_')[-1])
filenames = list(set([filename.split('_')[2] for filename in filenames]))

In [3]:
def hamming_score(y_true, y_pred):
    acc_list = []
    for i in range(y_true.shape[0]):
        set_true = set( np.where(y_true[i])[0] )
        set_pred = set( np.where(y_pred[i])[0] )
        tmp_a = None
        if len(set_true) == 0 and len(set_pred) == 0:
            tmp_a = 1
        else:
            tmp_a = len(set_true.intersection(set_pred))/\
                    float( len(set_true.union(set_pred)) )
        acc_list.append(tmp_a)
    
    return np.mean(acc_list)

In [13]:
eval_labels_data = np.load(f'{NPY_FILE_PATH}/eval_labels_winter-plant-155.npy', allow_pickle=True)

In [14]:
eval_labels_data[0]

array([[0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 1.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 1.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 1.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 1.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 1.0, 0.0, 0.0, 1.0],
       [0.0, 0.0, 1.0, 0.0, 0.0, 1.0],
       [0.0, 0.0, 1.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 1.0, 0.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 1.0, 0.0, 0.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 0.0, 0.0, 1.0, 0.0, 1.0],
       [0.0, 1.0, 0.0, 0.0, 0.0, 1.0],
       [1.0, 0.0, 0.0, 0.

In [22]:
for filename in filenames:
        run_name = filename.split('.')[0]
        print(f"The metrics for {run_name} run are:")
        labels_data = np.load(f'{NPY_FILE_PATH}/test_labels_{filename}', allow_pickle=True)
        preds_data = np.load(f'{NPY_FILE_PATH}/test_preds_{filename}', allow_pickle=True)
        eval_labels_data = np.load(f'{NPY_FILE_PATH}/eval_labels_{filename}', allow_pickle=True)
        eval_preds_data = np.load(f'{NPY_FILE_PATH}/eval_preds_{filename}', allow_pickle=True)
        labels = []
        preds = []
        eval_labels = []
        eval_preds = []
        for i in range(len(labels_data)):
                for j in range(len(labels_data[i])):
                        labels.append(labels_data[i][j][:5])
                        preds.append(preds_data[i][j][:5])
        for i in range(len(eval_labels_data)):
                for j in range(len(eval_labels_data[i])):
                        eval_labels.append(eval_labels_data[i][j][:5])
                        eval_preds.append(eval_preds_data[i][j][:5])
        labels = np.array(labels)
        preds = np.array(preds)
        eval_labels = np.array(eval_labels)
        eval_preds = np.array(eval_preds)
        max_hamming_score = 0
        max_thres = 0
        for thresh in np.linspace(0, 1, 11):
                copy_preds = eval_preds.copy()
                copy_preds[copy_preds >= thresh] = 1
                copy_preds[copy_preds < thresh] = 0
                if max_hamming_score < hamming_score(eval_labels, copy_preds):
                        max_hamming_score = hamming_score(eval_labels, copy_preds)
                        max_thres = thresh
                        # max_hamming_loss = hamming_loss(eval_labels, copy_preds)
        copy_preds = preds.copy()
        copy_preds[copy_preds >= max_thres] = 1
        copy_preds[copy_preds < max_thres] = 0                
        
        print("Thresh:", max_thres, "\tHamming Loss:", hamming_loss(copy_preds, labels), "\tHamming Score:", hamming_score(copy_preds, labels))
        preds[preds >= 0.5] = 1
        preds[preds < 0.5] = 0
        
        mapping = {0: "Organisation", 1: "Location", 2: "Individual", 3: "Community", 4: "None"}
        for i in range(0, 5):
                print(f"{mapping[i]} Precision: {precision_score(labels[:, i], preds[:, i])} Recall: {recall_score(labels[:, i], preds[:, i])} F1 Score: {f1_score(labels[:, i], preds[:, i])}")
        print()

The metrics for pleasant-deluge-145 run are:
Thresh: 0.30000000000000004 	Hamming Loss: 0.20042372881355933 	Hamming Score: 0.5296610169491526
Organisation Precision: 0.0 Recall: 0.0 F1 Score: 0.0
Location Precision: 0.0 Recall: 0.0 F1 Score: 0.0
Individual Precision: 0.6363636363636364 Recall: 0.04666666666666667 F1 Score: 0.08695652173913045
Community Precision: 0.3333333333333333 Recall: 0.06060606060606061 F1 Score: 0.10256410256410256
None Precision: 0.6401673640167364 Recall: 0.827027027027027 F1 Score: 0.7216981132075472

The metrics for eager-durian-150 run are:
Thresh: 0.30000000000000004 	Hamming Loss: 0.2055084745762712 	Hamming Score: 0.5010593220338984
Organisation Precision: 0.0 Recall: 0.0 F1 Score: 0.0
Location Precision: 0.0 Recall: 0.0 F1 Score: 0.0
Individual Precision: 0.7692307692307693 Recall: 0.06666666666666667 F1 Score: 0.1226993865030675
Community Precision: 0.48 Recall: 0.24242424242424243 F1 Score: 0.3221476510067114
None Precision: 0.779874213836478 Recall:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Community Precision: 0.2891566265060241 Recall: 0.24242424242424243 F1 Score: 0.2637362637362637
None Precision: 0.6428571428571429 Recall: 0.7297297297297297 F1 Score: 0.6835443037974683

The metrics for dashing-morning-143 run are:
Thresh: 0.2 	Hamming Loss: 0.25254237288135595 	Hamming Score: 0.46204096045197734
Organisation Precision: 0.0 Recall: 0.0 F1 Score: 0.0
Location Precision: 0.2727272727272727 Recall: 0.09375 F1 Score: 0.13953488372093023
Individual Precision: 0.5588235294117647 Recall: 0.12666666666666668 F1 Score: 0.20652173913043478
Community Precision: 0.5 Recall: 0.050505050505050504 F1 Score: 0.09174311926605505
None Precision: 0.5677655677655677 Recall: 0.8378378378378378 F1 Score: 0.6768558951965065

The metrics for driven-silence-145 run are:
Thresh: 0.30000000000000004 	Hamming Loss: 0.2572033898305085 	Hamming Score: 0.41066384180790955
Organisation Precision: 0.0 Recall: 0.0 F1 Score: 0.0
Location Precision: 0.1875 Recall: 0.1875 F1 Score: 0.1875
Individual Pre

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [29]:
preds[preds >= 0.5] = 1
preds[preds < 0.5] = 0

In [30]:
mapping = {0: "Organisation", 1: "Location", 2: "Individual", 3: "Community", 4: "None"}
for i in range(0, 5):
    print(f"{mapping[i]} Precision: {precision_score(labels[:, i], preds[:, i])} Recall: {recall_score(labels[:, i], preds[:, i])} F1 Score: {f1_score(labels[:, i], preds[:, i])}")

Organisation Precision: 0.0 Recall: 0.0 F1 Score: 0.0
Location Precision: 0.0 Recall: 0.0 F1 Score: 0.0
Individual Precision: 0.5288461538461539 Recall: 0.36666666666666664 F1 Score: 0.4330708661417323
Community Precision: 0.36923076923076925 Recall: 0.24242424242424243 F1 Score: 0.29268292682926833
None Precision: 0.5925925925925926 Recall: 0.7783783783783784 F1 Score: 0.6728971962616822


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
preds

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       ...,
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [13]:
idx = []
for i in range(len(preds)):
    if (preds[i] != labels[i]).any():
        idx.append(i)

In [18]:
import pandas as pd

In [22]:
df = pd.read_csv('data/hate_towards_whom_aug/test.csv')
df = df.iloc[idx]

In [None]:
[label for label in labels[idx].tolist()]

In [31]:
df['prediction'] = [",".join([mapping[i] for i, _pred in enumerate(pred) if _pred == 1]) for pred in preds[idx].tolist()]

In [34]:
df.tail(50)

Unnamed: 0,url,category,comment,label,target,prediction
63,https://www.bitchute.com/video/IqAY5vQy4IE4/,news & politics,Now that's how railroads of developed country ...,yes,Location,"Location,Community"
69,https://www.youtube.com/watch?v=VH0rNhUKOto,sports,This what happens when you just inherited mone...,yes,Individual,Community
71,https://www.youtube.com/watch?v=dExrKx7hE6A,health,The truth will shine it’s brightness through t...,yes,Organisation,Individual
74,https://www.youtube.com/watch?v=mLTBW7E6q4w,news & politics,She Mark Zuckerberg dressed in Drag. Don’t be ...,yes,Individual,"Individual,Community"
75,https://www.youtube.com/watch?v=PCVXj4g10j8,sports,aight mate I'm on my way to your house with an...,yes,Individual,Location
77,https://www.youtube.com/watch?v=W6AnCcxsRSk,religion,"This is absolutely shame, Indians giving z plu...",yes,Location,"Individual,Community"
83,https://www.bitchute.com/video/2HGLRJWmq7U/,news & politics,That Asian chinese woman has to go back too.,yes,Individual,Community
84,https://www.youtube.com/watch?v=iNxJf6V6nds,entertainment,Because of westerners,yes,Community,Individual
97,https://www.youtube.com/watch?v=duY9YfA-kuI&ab...,entertainment,John Oliver is living proof that the English c...,yes,Individual,"Individual,Community"
103,https://www.youtube.com/watch?v=mnQBiEw_W1g,news & politics,These opposition states will drive us to Slavery,yes,Organisation,Location
