In [3]:
import os
import json
import pandas as pd

In [4]:
lh_filepath = "/mnt/data1/datasets/hatespeech/latent_hatred/truncated/explanations-revised/train-explanations.jsonl"

misogynistic_filepath = "/mnt/data1/datasets/memes/Misogynistic_MEME/annotations/explanation.jsonl"
misogynistic_features_dir = "/mnt/data1/datasets/memes/cmtl-rag/mmhs/embeddings/clip-ViT-B-32"

In [5]:
def load_caption(filename, caption_dir):
    caption_filepath = os.path.join(caption_dir, filename)
    with open(caption_filepath) as f:
        d = json.load(f)

    return d['caption']


def load_features(features_dir, filename):
    # Iterate over all files in the folder
    file_path = os.path.join(features_dir, filename)
    with open(file_path, 'rb') as file:
        data = pickle.load(file)

    return data

def get_majority_voting(lst):
    from collections import Counter
    count = Counter(lst)
    most_common = count.most_common(1)
    return most_common[0][0] if most_common else None

In [6]:
with open("lh_target_mapping.json") as f:
    target_mapping = json.load(f)

print("Number of Targets:", len(target_mapping))
print("Number of Targets Mappings:", len(set(target_mapping.values())))
print("Targets Mappings:", set(target_mapping.values()))

Number of Targets: 326
Number of Targets Mappings: 8
Targets Mappings: {'political', 'religion', 'disability', 'organisations', 'race', 'others', 'nationality', 'sex'}


In [7]:
target2label = {
    None: 0
}

print(target2label)
for idx, v in enumerate(set(target_mapping.values())):
    target2label[v] = idx + 1

print(target2label)

{None: 0}
{None: 0, 'political': 1, 'religion': 2, 'disability': 3, 'organisations': 4, 'race': 5, 'others': 6, 'nationality': 7, 'sex': 8}


In [8]:
lh_annots = []
with open(lh_filepath) as f:
    for line in f:
        tmp = json.loads(line)
        obj = {
            "id": f"lh_{tmp['ID']}",
            "img": None,
            "caption": None,
            "text": tmp['post'],
            "label": tmp['class'],
            "implicit_class": tmp['implicit_class'],
            "target": tmp['target'],
            "target_category": target_mapping[tmp['target'].lower().replace('"', "'")] if tmp['target'] else None,
            "rationale": tmp["mistral_instruct_statement"],
            "features": "N/A"
        }
        obj["stratified_label"] = target2label[obj['target_category']]
        lh_annots.append(obj)

lh_annots[-690]

{'id': 'lh_916944251635752960',
 'img': None,
 'caption': None,
 'text': ' i hate to break to you  but hey all you  " white " " and " " black " " people ? you are the immigrant . have a great day . "',
 'label': 1,
 'implicit_class': None,
 'target': 'Whites and Blacks',
 'target_category': 'race',
 'rationale': '1. Targeted Group: The post targets all White and Black people by referring to them as "you" and "hey all you" without any consideration for individual differences. This lumping of people into categories based on their race is a broad generalization and perpetuates a harmful divide between races.\n2. Derogatory Imagery or Language: Although not explicitly stated, the use of "hey all you \'white\' \'black\' people" implies that both white and black people are inferior to someone else. This is a form of divisive language that fuels racism and contributes to a hostile environment.\n3. Impact on Bias, Stereotypes, Prejudice, Discrimination, or Inciting Harm: The post\'s hateful un

In [9]:
target_list = set([x['target'] for x in lh_annots])
target_list.remove(None)
target_list = set([x.lower() for x in target_list])
print("Number of Targets:", len(target_list))

Number of Targets: 326


In [10]:
labels = set([x['label'] for x in lh_annots])
labels

{0, 1, 2}

In [11]:
misogynistic_annots = []
with open(misogynistic_filepath) as f:
    for idx, line in enumerate(f):
        tmp = json.loads(line)
        tmp['content_for_retrieval'] = tmp['content']
        tmp['target'] = "misogyny"
        tmp["target_category"] = "sex"
        misogynistic_annots.append(tmp)

misogynistic_annots[0]

{'id': 'res_ENG01',
 'img': '/mnt/data1/datasets/memes/Misogynistic_MEME/images/img/combined/res_ENG01.jpg',
 'caption': 'a man and a woman in a red convertible car',
 'web_entities': 'Car, Misogyny, Meme, Shelby Mustang, Ford Mustang, Internet meme, Text, Driving, Automatic Transmission, Manual Transmission',
 'text': 'The way every man feels when a woman is driving',
 'content': 'The way every man feels when a woman is driving a man and a woman in a red convertible car',
 'label': 1,
 'rationale': '</s> Targeted Group: Despite the lack of explicit reference to any specific group, the meme conveys a negative perception of women drivers, and as such, implicitly targets women as a group. \n\nDerogatory Imagery/Language: The statement "The way every man feels when a woman is driving" implies that women drivers cause negative feelings in men, creating an unnecessary and derogatory association between gender and driving ability.\n\nImpact on Bias/Stereotypes: The meme reinforces the harmfu

In [12]:
labels = set([x['label'] for x in misogynistic_annots])
labels

{0, 1}

In [13]:
# for k, v in meme_templates.items():
#     print(k, v[-1])

### 1 - Rationale Similarity Matching

Use TF-IDF to find tweets that are of similar explanations

In [14]:
annots = lh_annots + misogynistic_annots
corpus, labels = [], []
for a in annots:
    corpus.append(a['rationale'].strip())
    labels.append(1 if a['label'] >= 1 else 0)

print("Annots:", len(annots))
print("Corpus:", len(corpus))
print("Labels:", len(labels))

Annots: 10800
Corpus: 10800
Labels: 10800


In [15]:
from matching.tfidf_wrapper import compute_corpus_matrix # , get_top_k_similar, get_top_k_dissimilar
from sklearn.metrics.pairwise import cosine_similarity

In [16]:
corpus_matrix, vectorizer = compute_corpus_matrix(corpus)
print("Corpus Matrix:", corpus_matrix.shape)

Corpus Matrix: (10800, 15962)


In [17]:
sim_matrix = cosine_similarity(corpus_matrix, corpus_matrix)
print("Similarity Matrices:", sim_matrix.shape)

Similarity Matrices: (10800, 10800)


### 2 - Stage 1 Training (Tweets Dataset)

In [18]:
df = pd.DataFrame(lh_annots)
X = df
y = df['stratified_label']

In [19]:
# Stratified Split Records for Train/Dev
from sklearn.model_selection import StratifiedShuffleSplit
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

for train_index, test_index in split.split(X, y):
    strat_train_set = df.loc[train_index]
    strat_test_set = df.loc[test_index]

# Output the split datasets
print("Train Set:")

print(strat_train_set['stratified_label'].value_counts())
print("\nTest Set:")
print(strat_test_set['stratified_label'].value_counts())

Train Set:
stratified_label
0    5271
5    1117
7     952
2     313
1     190
8      69
4      50
6      36
3       2
Name: count, dtype: int64

Test Set:
stratified_label
0    1318
5     280
7     238
2      78
1      47
8      17
4      12
6       9
3       1
Name: count, dtype: int64


In [38]:
lh_annots[0]

{'id': 'lh_803579998141452288',
 'img': None,
 'caption': None,
 'text': 'hacking is a big danger  computers are not full proof and e-payment is complete hoax even e-mails are not safe .',
 'label': 0,
 'implicit_class': None,
 'target': None,
 'target_category': None,
 'rationale': "1. Targeted Group: The post doesn't target any specific group of people. It raises a general concern about security risks, particularly in relation to digital technology and online transactions.\n\n2. Derogatory Imagery or Language: The post contains no explicit derogatory imagery or language towards any particular group or individual.\n\n3. Impact on Bias, Stereotypes, Prejudice, Discrimination, or Inciting Harm: The post doesn't contribute to perpetuating bias, stereotypes, or inciting harm against any specific group. Instead, it expresses a security concern that affects everyone who uses digital technology and engages in online transactions.\n\nIn summary, the post is not hateful.",
 'features': 'N/A',


In [39]:
def get_strong_positives(record, sim_vector, annots, k):
    positives = []
    indices = sim_vector.argsort()[::-1] # High confidence first
    for ind in indices:
        ind = int(ind)
        confid = round(sim_vector[ind], 5)
        annot = annots[ind]

        obj = {
            "id": annot["id"],
            "content": annot["text"],
            "rationale": annot["rationale"],
            "confid": confid
        }

        if record['target_category']:
            if record['target_category'] == annot['target_category']:
                positives.append(obj)
        else:
            if record['label'] == annot['label']:
                positives.append(obj)

        if len(positives) == k:
            break
            
    return {"positives": positives}

def get_strong_negatives(record, sim_vector, annots, k):
    negatives = []
    indices = sim_vector.argsort()[::] # Low confidence first
    for ind in indices:
        ind = int(ind)
        confid = round(sim_vector[ind], 5)
        annot = annots[ind]

        obj = {
            "id": annot["id"],
            "content": annot["text"],
            "rationale": annot["rationale"],
            "confid": confid
        }


        if record['target_category']:
            if record['target_category'] != annot['target_category']:
                negatives.append(obj)
        else:
            if record['label'] != annot['label']:
                negatives.append(obj)

        if len(negatives) == k:
            break
    return {"negatives": negatives}

In [40]:
# Locating records with mismatched extracted rationales
import tqdm
import numpy as np

for k in [1]:
    training_records = []
    for idx, record in tqdm.tqdm(enumerate(lh_annots)):
        sim_vector = sim_matrix[idx]
        sim_vector = np.concatenate((sim_vector[0:idx], sim_vector[idx + 1:10000]))
        sim_annots = lh_annots[0:idx] + lh_annots[idx + 1:10000]
        
        pos = get_strong_positives(record, sim_vector, sim_annots, k)
        negs = get_strong_negatives(record, sim_vector, sim_annots, k)
        obj = {
            "id": record["id"],
            "query": record["text"],
            "label": record["label"],
            "target_category": record["target_category"],
        }
        obj = {**obj, **pos, **negs}

        
        key = record['id']
        training_records.append(obj)
    
    print(len(training_records))
    print(json.dumps(record, indent=2))
    print(json.dumps(training_records, indent=2))
    
    with open(f"../data/lh_training_{k}_demonstrations.json", "w+") as f:
        json.dump(training_records, f)    

10000it [00:13, 727.00it/s]
IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [18]:
# Locating records with mismatched extracted rationales
import tqdm
import numpy as np

for k in [1]:
    training_records = []
    for idx, record in tqdm.tqdm(enumerate(lh_annots[:1])):
        sim_vector = sim_matrix[idx]
        sim_vector = np.concatenate((sim_vector[0:idx], sim_vector[idx + 1:10000]))
        sim_annots = lh_annots[0:idx] + lh_annots[idx + 1:10000]
        
        pos = get_strong_positives(record, sim_vector, sim_annots, k)
        negs = get_strong_negatives(record, sim_vector, sim_annots, k)
        obj = {
            "id": record["id"],
            "query": record["text"],
            "label": record["label"],
            "target_category": record["target_category"],
        }
        obj = {**obj, **pos, **negs}
    
        key = record['id']
        training_records.append(obj)
    
    print(len(training_records))
    print(json.dumps(record, indent=2))
    print(json.dumps(training_records, indent=2))
    
    with open(f"../datasets/lh_training_{k}_demonstrations.json", "w+") as f:
        json.dump(training_records, f)    

1it [00:00, 625.46it/s]

1
{
  "id": "lh_803579998141452288",
  "img": null,
  "caption": null,
  "text": "hacking is a big danger  computers are not full proof and e-payment is complete hoax even e-mails are not safe .",
  "label": 0,
  "implicit_class": null,
  "target": null,
  "target_category": null,
  "rationale": "1. Targeted Group: The post doesn't target any specific group of people. It raises a general concern about security risks, particularly in relation to digital technology and online transactions.\n\n2. Derogatory Imagery or Language: The post contains no explicit derogatory imagery or language towards any particular group or individual.\n\n3. Impact on Bias, Stereotypes, Prejudice, Discrimination, or Inciting Harm: The post doesn't contribute to perpetuating bias, stereotypes, or inciting harm against any specific group. Instead, it expresses a security concern that affects everyone who uses digital technology and engages in online transactions.\n\nIn summary, the post is not hateful.",
  "feat




FileNotFoundError: [Errno 2] No such file or directory: '../datasets/lh_training_1_demonstrations.json'

### Data for Facebook's GitHub Repository

In [None]:
def get_strong_positives(record, sim_vector, annots, k):
    positives = []
    indices = sim_vector.argsort()[::-1] # High confidence first
    for ind in indices:
        ind = int(ind)
        confid = round(sim_vector[ind], 5)
        annot = annots[ind]

        obj = {
            "text": record["rationale"]
        }

        if record['target_category']:
            if record['target_category'] == annot['target_category']:
                positives.append(obj)
        else:
            if record['label'] == annot['label']:
                positives.append(obj)

        if len(positives) == k:
            break
            
    return {"positive_ctxs": positives}

def get_strong_negatives(record, sim_vector, annots, k):
    negatives = []
    indices = sim_vector.argsort()[::] # Low confidence first
    for ind in indices:
        ind = int(ind)
        confid = round(sim_vector[ind], 5)
        annot = annots[ind]

        obj = {
            "passage": record["rationale"]
        }


        if record['target_category']:
            if record['target_category'] != annot['target_category']:
                negatives.append(obj)
        else:
            if record['label'] != annot['label']:
                negatives.append(obj)

        if len(negatives) == k:
            break
    return {"negative_ctxs": negatives}

# Locating records with mismatched extracted rationales
import tqdm
import numpy as np

for k in [1]:
    training_records = []
    for idx, record in tqdm.tqdm(enumerate(lh_annots[:1])):
        sim_vector = sim_matrix[idx]
        sim_vector = np.concatenate((sim_vector[0:idx], sim_vector[idx + 1:10000]))
        sim_annots = lh_annots[0:idx] + lh_annots[idx + 1:10000]
        
        pos = get_strong_positives(record, sim_vector, sim_annots, k)
        negs = get_strong_negatives(record, sim_vector, sim_annots, k)
        obj = {
            "id": record["id"],
            "query": record["text"],
            "label": record["label"],
            "target_category": record["target_category"],
        }
        obj = {**obj, **pos, **negs}
    
        key = record['id']
        training_records.append(obj)
    
    print(len(training_records))
    print(json.dumps(record, indent=2))
    print(json.dumps(training_records, indent=2))
    
    with open(f"../datasets/lh_training_{k}_demonstrations.json", "w+") as f:
        json.dump(training_records, f)    