In [1]:
import os
import json
import pandas as pd

In [2]:
lh_filepath = "/mnt/data1/datasets/hatespeech/latent_hatred/truncated/explanations-revised/train-explanations.jsonl"

misogynistic_filepath = "/mnt/data1/datasets/memes/Misogynistic_MEME/annotations/explanation.jsonl"
misogynistic_features_dir = "/mnt/data1/datasets/memes/cmtl-rag/mmhs/embeddings/clip-ViT-B-32"

In [3]:
def load_caption(filename, caption_dir):
    caption_filepath = os.path.join(caption_dir, filename)
    with open(caption_filepath) as f:
        d = json.load(f)

    return d['caption']


def load_features(features_dir, filename):
    # Iterate over all files in the folder
    file_path = os.path.join(features_dir, filename)
    with open(file_path, 'rb') as file:
        data = pickle.load(file)

    return data

def get_majority_voting(lst):
    from collections import Counter
    count = Counter(lst)
    most_common = count.most_common(1)
    return most_common[0][0] if most_common else None

In [4]:
with open("lh_target_mapping.json") as f:
    target_mapping = json.load(f)

print("Number of Targets:", len(target_mapping))
print("Number of Targets Mappings:", len(set(target_mapping.values())))
print("Targets Mappings:", set(target_mapping.values()))

Number of Targets: 326
Number of Targets Mappings: 8
Targets Mappings: {'political', 'religion', 'sex', 'others', 'nationality', 'organisations', 'race', 'disability'}


In [5]:
lh_annots = []
with open(lh_filepath) as f:
    for line in f:
        tmp = json.loads(line)
        obj = {
            "id": f"lh_{tmp['ID']}",
            "img": None,
            "caption": None,
            "text": tmp['post'],
            "label": tmp['class'],
            "implicit_class": tmp['implicit_class'],
            "target": tmp['target'],
            "target_category": target_mapping[tmp['target'].lower().replace('"', "'")] if tmp['target'] else None,
            "rationale": tmp["mistral_instruct_statement"],
            "features": "N/A"
        }
        lh_annots.append(obj)

lh_annots[0]

{'id': 'lh_803579998141452288',
 'img': None,
 'caption': None,
 'text': 'hacking is a big danger  computers are not full proof and e-payment is complete hoax even e-mails are not safe .',
 'label': 0,
 'implicit_class': None,
 'target': None,
 'target_category': None,
 'rationale': "1. Targeted Group: The post doesn't target any specific group of people. It raises a general concern about security risks, particularly in relation to digital technology and online transactions.\n\n2. Derogatory Imagery or Language: The post contains no explicit derogatory imagery or language towards any particular group or individual.\n\n3. Impact on Bias, Stereotypes, Prejudice, Discrimination, or Inciting Harm: The post doesn't contribute to perpetuating bias, stereotypes, or inciting harm against any specific group. Instead, it expresses a security concern that affects everyone who uses digital technology and engages in online transactions.\n\nIn summary, the post is not hateful.",
 'features': 'N/A'}

In [6]:
target_list = set([x['target'] for x in lh_annots])
target_list.remove(None)
target_list = set([x.lower() for x in target_list])
print("Number of Targets:", len(target_list))

Number of Targets: 326


In [7]:
labels = set([x['label'] for x in lh_annots])
labels

{0, 1, 2}

In [8]:
misogynistic_annots = []
with open(misogynistic_filepath) as f:
    for idx, line in enumerate(f):
        tmp = json.loads(line)
        tmp['content_for_retrieval'] = tmp['content']
        tmp['target'] = "misogyny"
        tmp["target_category"] = "sex"
        misogynistic_annots.append(tmp)

misogynistic_annots[0]

{'id': 'res_ENG01',
 'img': '/mnt/data1/datasets/memes/Misogynistic_MEME/images/img/combined/res_ENG01.jpg',
 'caption': 'a man and a woman in a red convertible car',
 'web_entities': 'Car, Misogyny, Meme, Shelby Mustang, Ford Mustang, Internet meme, Text, Driving, Automatic Transmission, Manual Transmission',
 'text': 'The way every man feels when a woman is driving',
 'content': 'The way every man feels when a woman is driving a man and a woman in a red convertible car',
 'label': 1,
 'rationale': '</s> Targeted Group: Despite the lack of explicit reference to any specific group, the meme conveys a negative perception of women drivers, and as such, implicitly targets women as a group. \n\nDerogatory Imagery/Language: The statement "The way every man feels when a woman is driving" implies that women drivers cause negative feelings in men, creating an unnecessary and derogatory association between gender and driving ability.\n\nImpact on Bias/Stereotypes: The meme reinforces the harmfu

In [9]:
labels = set([x['label'] for x in misogynistic_annots])
labels

{0, 1}

In [10]:
# for k, v in meme_templates.items():
#     print(k, v[-1])

### 1 - Rationale Similarity Matching

Use TF-IDF to find tweets that are of similar explanations

In [11]:
annots = lh_annots + misogynistic_annots
corpus, labels = [], []
for a in annots:
    corpus.append(a['rationale'].strip())
    labels.append(1 if a['label'] >= 1 else 0)

print("Annots:", len(annots))
print("Corpus:", len(corpus))
print("Labels:", len(labels))

Annots: 10800
Corpus: 10800
Labels: 10800


In [12]:
from matching.tfidf_wrapper import compute_corpus_matrix, get_top_k_similar, get_top_k_dissimilar
from sklearn.metrics.pairwise import cosine_similarity

In [13]:
corpus_matrix, vectorizer = compute_corpus_matrix(corpus)
print("Corpus Matrix:", corpus_matrix.shape)

Corpus Matrix: (10800, 15962)


In [14]:
sim_matrix = cosine_similarity(corpus_matrix, corpus_matrix)
print("Similarity Matrices:", sim_matrix.shape)

Similarity Matrices: (10800, 10800)


### 2 - Stage 1 Training (Tweets Dataset)

In [15]:
lh_annots[0]

{'id': 'lh_803579998141452288',
 'img': None,
 'caption': None,
 'text': 'hacking is a big danger  computers are not full proof and e-payment is complete hoax even e-mails are not safe .',
 'label': 0,
 'implicit_class': None,
 'target': None,
 'target_category': None,
 'rationale': "1. Targeted Group: The post doesn't target any specific group of people. It raises a general concern about security risks, particularly in relation to digital technology and online transactions.\n\n2. Derogatory Imagery or Language: The post contains no explicit derogatory imagery or language towards any particular group or individual.\n\n3. Impact on Bias, Stereotypes, Prejudice, Discrimination, or Inciting Harm: The post doesn't contribute to perpetuating bias, stereotypes, or inciting harm against any specific group. Instead, it expresses a security concern that affects everyone who uses digital technology and engages in online transactions.\n\nIn summary, the post is not hateful.",
 'features': 'N/A'}

In [None]:
# Locating records with mismatched extracted rationales
import tqdm
import numpy as np

def get_strong_positives(record, sim_vector, annots, k):
    positives = {
        "pos_indices": [],
        "pos_confidence": []
    }
    indices = sim_vector.argsort()[::-1] # High confidence first
    for ind in indices:
        ind = int(ind)
        confid = sim_vector[ind]
        annot = annots[ind]

        if record['target_category']:
            if record['target_category'] == annot['target_category']:
                positives['pos_indices'].append(ind)
                # positives['pos_confidence'].append(confid)
        else:
            if record['label'] == annot['label']:
                positives['pos_indices'].append(ind)
                # positives['pos_confidence'].append(confid)

        if len(negatives['pos_indices']) == k:
            break
            
    return positives

def get_strong_negatives(record, sim_vector, annots, k):
    negatives = {
        "neg_indices": [],
        "neg_confidence": []
    }
    indices = sim_vector.argsort()[::] # Low confidence first
    for ind in indices:
        ind = int(ind)
        confid = sim_vector[ind]
        annot = annots[ind]

        if record['target_category']:
            if record['target_category'] != annot['target_category']:
                negatives['neg_indices'].append(ind)
                negatives['neg_confidence'].append(confid)
        else:
            if record['label'] != annot['label']:
                negatives['neg_indices'].append(ind)
                negatives['neg_confidence'].append(confid)

        if len(negatives['neg_indices']) == k:
            break
    return negatives
    

training_records = {}
for idx, record in tqdm.tqdm(enumerate(lh_annots)):
    sim_vector = sim_matrix[idx]
    sim_vector = np.concatenate((sim_vector[0:idx], sim_vector[idx + 1:10000]))
    sim_annots = lh_annots[0:idx] + lh_annots[idx + 1:10000]
    
    pos = get_strong_positives(record, sim_vector, sim_annots)
    negs = get_strong_negatives(record, sim_vector, sim_annots)
    demonstrations = {**pos, **negs}

    key = record['id']
    training_records[key] = demonstrations

print(len(training_records))

8203it [00:49, 170.13it/s]

In [None]:
training_records

In [None]:
with open("../datasets/stage1_training.json", "w+") as f:
    json.dump(training_records, f)    