In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
cd /content/gdrive/MyDrive/SemEval2023/SemEval2022-Task10/re_run

/content/gdrive/.shortcut-targets-by-id/1lC-ZKLaCDQyfLcof2Ak7FDa6IvTt318A/SemEval2023/SemEval2022-Task10/re_run


In [3]:
import pandas as pd

In [4]:
from sklearn.model_selection import train_test_split

target_column = "label_category"
data = pd.read_csv("../Data/starting_ki/train_all_tasks.csv")

# target_column = "label_vector"
# data = pd.read_csv("../GeneratedTexts/task_c_generated_text.csv")
# data[target_column] = data["label"]

data = data[data[target_column]!="none"]
data.reset_index(inplace=True)
data.drop(columns=['index'], inplace=True)

In [5]:
data.groupby(target_column).count()

Unnamed: 0_level_0,rewire_id,text,label_sexist,label_vector
label_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"1. threats, plans to harm and incitement",310,310,310,310
2. derogation,1590,1590,1590,1590
3. animosity,1165,1165,1165,1165
4. prejudiced discussions,333,333,333,333


In [6]:
if target_column == "label_category":
  label_values = [
      '1. threats, plans to harm and incitement',
      '2. derogation',
      '3. animosity',
      '4. prejudiced discussions',
  ]

elif target_column == "label_vector":
  label_values = [
      '1.1 threats of harm',
      '1.2 incitement and encouragement of harm',
      '2.1 descriptive attacks',
      '2.2 aggressive and emotive attacks',
      '2.3 dehumanising attacks & overt sexual objectification',
      '3.1 casual use of gendered slurs, profanities, and insults',
      '3.2 immutable gender differences and gender stereotypes',
      '3.3 backhanded gendered compliments',
      '3.4 condescending explanations or unwelcome advice',
      '4.1 supporting mistreatment of individual women',
      '4.2 supporting systemic discrimination against women as a group',
  ]

## Calculate Shapley

In [7]:
!pip install -q transformers shap

In [39]:
from transformers import BertTokenizer
swtokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [9]:
ls Models

[0m[01;34mfine-tuned-bert[0m/  [01;34mfine-tuned-bertweet[0m/  [01;34mfine-tuned-twhinbert[0m/


In [10]:
import torch
from transformers import BertForSequenceClassification, AdamW, BertConfig

out_dir = f'Models/fine-tuned-bert'
model = BertForSequenceClassification.from_pretrained(out_dir)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


In [11]:
import transformers
import torch
import numpy as np
import scipy as sp

In [12]:
texts = data['text'].values

In [13]:
import shap
# define a prediction function
def f(texts):
  text_ids = [tokenizer.encode(text, max_length=100, padding='max_length', truncation=True) for text in texts]

  att_masks = []
  for ids in text_ids:
      masks = [int(id > 0) for id in ids]
      att_masks.append(masks)

  text_ids = torch.tensor(text_ids).to(device)
  att_masks = torch.tensor(att_masks).to(device)

  outputs = model(text_ids, attention_mask=att_masks)
  outputs = outputs[0].detach().cpu().numpy()
  scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
  
  val = sp.special.logit(scores[:,1]) # use one vs rest logit units
  return val

In [14]:
for label in label_values:
  print(label)

1. threats, plans to harm and incitement
2. derogation
3. animosity
4. prejudiced discussions


In [15]:
# import pickle

# def save_shap_values(filepath, obj):
#   with open(filepath, 'wb') as fin:
#     pickle.dump(obj, fin)
    

# explainer = shap.Explainer(f, tokenizer)

# d = data
# d = {"text": d["text"].values}
# shap_values = explainer(d, fixed_context=1, batch_size=256)
# save_shap_values(f"./Results/shapley_values.pickle", shap_values)

In [31]:
import pickle

def load_shap_values(filepath):
  with open(filepath, 'rb') as fin:
    obj = pickle.load(fin)
  return obj

shap_values = load_shap_values(f"./Results/shapley_values.pickle")

In [17]:
def get_lexicons(train, shap_values):
  lexicons = {}
  for idx, label in enumerate(label_values):
    d = train[train[target_column]==label]
    d = data.reset_index().merge(d[["rewire_id"]], on="rewire_id").set_index('index')
    s = shap_values.abs[d.index.to_numpy()]

    feature_names = s.mean(0).feature_names
    shapley_values = s.mean(0).values

    sorted_values = sorted(zip(shapley_values, feature_names), key=lambda pair: -pair[0])
    lexicons[label] = {x:v for v, x in sorted_values}
  return lexicons
  
# train = pd.read_csv(f"Data/0_train.csv")
# lexicons = get_lexicons(train, shap_values)

In [25]:
# for k in lexicons:
#   for sw in lexicons[k]:
#     if sw.startswith("#") or sw.startswith("_"):
#       print(sw)
#     if sw.endswith("#") or sw.endswith("_"):
#       print(sw)

# Handle subwords

In [26]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [27]:
from nltk.tokenize import TweetTokenizer
nltktokenizer = TweetTokenizer()

def word_tokenize(sent):
  # return nltk.word_tokenize(sent)
  return nltktokenizer.tokenize(sent)

In [59]:
from itertools import groupby
def clean_and_tokenize(sent):
  sent = sent.lower().replace("#", "").replace("''", '"').replace(".", " ").replace("-", " ").replace("&x200b;", "")

  # remove more than 3 consecutive repeated characters
  groups = groupby(sent)
  sent = "".join([label*min(3, sum(1 for _ in group)) for label, group in groups])
  

  sent = sent.encode("ascii", "ignore")
  sent = sent.decode()
  
  words = word_tokenize(sent)
  words = normalise_quote(words)
    
  subwords = swtokenizer.tokenize(sent)
  subwords = normalise_quote(subwords)
  return words, subwords

def merge_score(lexicons, idxs, subwords):
  new_lexicons = {}
  for idx in idxs:
    w = ""
    s = 0
    for i in idx:
      sw = subwords[i].replace("#", "")
      w += sw
      if sw not in lexicons[label]:
      #   print(">>", sw)
        continue

      s += lexicons[label][sw]
    if w in new_lexicons:
      new_lexicons[w] = max(new_lexicons[w], s)
    else:
      new_lexicons[w] = s
  return new_lexicons
  

In [64]:
from collections import defaultdict

def map_subwords(words, subwords):
  sidx = 0
  widx = 0
  w = ""
  mapping = []

  while widx < len(words):
    w = w + words[widx]
    if sidx >= len(subwords):
      print(widx, w, sidx)
      print(words)
      print(subwords)

    idx = [sidx]
    s = subwords[sidx].replace("#", "")

    while len(w) < len(s):
      widx += 1
      w = w + words[widx].strip()

    # if words[0] == "thotlife":
    #     print(w, s)

    while s!=w and len(s) < len(w) and sidx+1 < len(subwords):
      sidx += 1
      s += subwords[sidx].replace("#", "").strip()
      idx.append(sidx)

      # if words[0] == "thotlife":
      #   print(f"[{w}] [{s}]", len(w), len(s), w==s)

    if len(s) > len(w):
      widx += 1
      sidx -= len(idx) - 1
      continue
    
  
    if len(w) > 20 and w not in words:
      print("ERROR", w)

      # assert(False)    
    sidx += 1
    # print(w, s, idx)
    widx += 1
    w = ""

    mapping.append(idx)

  return mapping

def normalise_quote(tokens):
  for widx, w in enumerate(tokens):
    if w=="``":
      tokens[widx] = '"'
    elif w=="''":
      tokens[widx] = '"'
    elif w=="... ...":
      tokens[widx] = '......'
      
  return tokens


from itertools import groupby

def merge_subwords(lexicons, train):
  vocabs = {}
  new_lexicons_scores = {}

  for idx, label in enumerate(label_values):
    print("Working on", label)
    new_lexicons = defaultdict(int)

    d = train[train[target_column]==label]
    texts = d["text"].values

    for sent in texts:
      words, subwords = clean_and_tokenize(sent)
      idxs = map_subwords(words, subwords)
      
      for idx in idxs:
        w = ""
        s = 0
        for i in idx:
          sw = subwords[i].replace("#", "")
          w += sw
          if sw not in lexicons[label]:
          #   print(">>", sw)
            continue

          s += lexicons[label][sw]
          
        new_lexicons[w] = max(new_lexicons[w], s)
  
    sorted_list = sorted(new_lexicons.items(), key=lambda item: -item[1])
    new_lexicons_scores[label] = { k:v for k, v in sorted_list if v > 0}
  return new_lexicons_scores


# newlexicons = merge_subwords(lexicons, train)
# for c in newlexicons:
#   newlexicons[c] = {k: v for k, v in sorted(newlexicons[c].items(), key=lambda item: -item[1])}

#   print(c, list(newlexicons[c].keys())[0:10])

In [65]:
import numpy as np
def filter_lexicons(lexicons, q=0.8):
  threshold = {}
  for l in lexicons:
    values = [v for k, v in lexicons[l].items()]
    threshold[l] = np.quantile(values, q)


  new_lexicons = {}
  for l in lexicons:
    new_lexicons[l] = {}
    for w in lexicons[l]:
      v = lexicons[l][w]

      if v > threshold[l]:
        new_lexicons[l][w] = v
  return new_lexicons

# filtered_lexicons = filter_lexicons(newlexicons, q=0.8)

## Evaluation

In [66]:
from sklearn.metrics import f1_score

def predict(word, lexicons):
  cnt = {}
  for label in label_values:
    cnt[label] = 0

  for w in word:
    for label in label_values:
      if w in lexicons[label]:
        cnt[label] += lexicons[label][w]

  
  if sum(cnt.values())==0:
    return None
  
  return max(cnt.items(), key=lambda k: k[1])[0]

def run_predict(test_words, test_labels, lexicons, return_predict=False):
  y_pred = []
  y_test = []
  non = 0
  for word, label in zip(test_words, test_labels):
    pred = predict(word, lexicons)
    if pred is None:
      non += 1
      continue

    y_pred.append(pred)
    y_test.append(label)

  f1 = f1_score(y_test, y_pred, average='macro')
  skip = non/len(test_words)

  if return_predict:
    return f1, skip, y_test, y_pred
    
  return f1, skip

# test = pd.read_csv(f"Data/0_test.csv")
# test_words = [nltk.word_tokenize(sent) for sent in test["text"].values]
# test_labels = test[target_column].values

# run_predict(test_words, test_labels, filtered_lexicons)

## Hypterparameter Tuning

In [67]:
all_f1, all_skip = {}, {}
for q in np.arange(0.5, 1, 0.05):
  all_f1[q] = []
  all_skip[q] = []

In [68]:
for i in range(5):
  train = pd.read_csv(f"Data/{i}_train.csv")

  val = pd.read_csv(f"Data/{i}_val.csv")
  val_words = [word_tokenize(sent) for sent in val["text"].values]
  val_labels = val[target_column].values

  lexicons = get_lexicons(train, shap_values)
  newlexicons = merge_subwords(lexicons, train)
  

  for q in np.arange(0.5, 1, 0.05):
    filtered_lexicons = filter_lexicons(newlexicons, q=q)
    f1, skip = run_predict(val_words, val_labels, filtered_lexicons)

    all_f1[q].append(f1)
    all_skip[q].append(skip)

  print("DONE",i)
  

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions
DONE 0


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions
DONE 1


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions
DONE 2


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions
DONE 3


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions
DONE 4


In [69]:
import numpy as np

for q in np.arange(0.5, 1, 0.05):
  print(f"{q:.3f} >> F1:{np.mean(all_f1[q]):.3f}±{np.std(all_f1[q]):.3f}, SKIP: {np.mean(all_skip[q]):.3f}±{np.std(all_skip[q]):.3f}")

0.500 >> F1:0.294±0.019, SKIP: 0.001±0.001
0.550 >> F1:0.284±0.028, SKIP: 0.002±0.002
0.600 >> F1:0.271±0.021, SKIP: 0.002±0.002
0.650 >> F1:0.262±0.012, SKIP: 0.004±0.002
0.700 >> F1:0.283±0.015, SKIP: 0.004±0.001
0.750 >> F1:0.270±0.029, SKIP: 0.017±0.005
0.800 >> F1:0.282±0.022, SKIP: 0.035±0.003
0.850 >> F1:0.275±0.031, SKIP: 0.055±0.006
0.900 >> F1:0.280±0.044, SKIP: 0.104±0.013
0.950 >> F1:0.286±0.035, SKIP: 0.230±0.025


In [70]:
for k in lexicons:
  print(k, list(lexicons[k])[0:10])

1. threats, plans to harm and incitement ['whore', 'bitch', 'ska', 'pussy', 'feminism', 'ssi', 'feminist', 'prostitutes', 'ars', '3']
2. derogation ['isa', 'bitch', 'dyke', 'odle', 'hooker', 'lays', 'pussy', 'ssi', 'whore', 'handles']
3. animosity ['bitch', 'pussy', 'auto', 'whore', 'dyke', 'pizza', 'sensible', 'ssi', 'sies', 'cu']
4. prejudiced discussions ['whore', 'pussy', 'regretted', 'idiots', 'linda', 'fe', 'ac', 'mothers', 'sub', 'body']


## Evaluate Lexicons

In [71]:
from sklearn.metrics import precision_recall_fscore_support
import json

all_f1 = []
all_skip = []
all_p = []
all_r = []

for i in range(5):
  train = pd.read_csv(f"Data/{i}_train.csv")
  test = pd.read_csv(f"Data/{i}_test.csv")

  texts = test["text"].values
  test_words = [word_tokenize(sent) for sent in test["text"].values]
  test_labels = test[target_column].values

  lexicons = get_lexicons(train, shap_values)
  newlexicons = merge_subwords(lexicons, train)
  filtered_lexicons = filter_lexicons(newlexicons, q=0.90)  

  for c in filtered_lexicons:
    filtered_lexicons[c] = {k: v for k, v in sorted(filtered_lexicons[c].items(), key=lambda item: -item[1])}
  
  with open(f'Results/TaskB/lexicon_shapley_train_{i}.json', 'w') as outfile:
      json.dump(filtered_lexicons, outfile)

  f1, skip, y_test, y_pred = run_predict(test_words, test_labels, filtered_lexicons, return_predict=True)
  p, r, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='macro')

  all_p.append(p)
  all_r.append(r)
  all_f1.append(f1)
  all_skip.append(skip)

print(f"F1:{np.mean(all_f1):.3f}±{np.std(all_f1):.3f}, SKIP: {np.mean(all_skip):.3f}±{np.std(all_skip):.3f}")
print(f"P:{np.mean(all_p):.3f}±{np.std(all_p):.3f}, R: {np.mean(all_r):.3f}±{np.std(all_r):.3f}")

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions
F1:0.271±0.019, SKIP: 0.115±0.016
P:0.287±0.023, R: 0.286±0.027


In [72]:
train = pd.read_csv("../Data/starting_ki/train_all_tasks.csv")
train = train[train[target_column]!="none"]

lexicons = get_lexicons(train, shap_values)
newlexicons = merge_subwords(lexicons, train)
filtered_lexicons = filter_lexicons(newlexicons, q=0.90)  

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.


Working on 1. threats, plans to harm and incitement
Working on 2. derogation
Working on 3. animosity
Working on 4. prejudiced discussions


In [73]:
for c in filtered_lexicons:
  filtered_lexicons[c] = {k: v for k, v in sorted(filtered_lexicons[c].items(), key=lambda item: -item[1])}

  print(c, list(filtered_lexicons[c].keys())[0:10])

1. threats, plans to harm and incitement ['whores', 'skank', 'whore', 'pussies', 'bitches', 'bitch', 'pussy', 'feminism', 'ww3', 'uniteamerica']
2. derogation ['hillaryclintonisabitch', 'bhahahahahaahahahahaha', 'noodlewhore', 'noodlefoids', "bitch's", 'bitches', 'bitchy', 'pussies', 'fembots', 'bitch']
3. animosity ['muhahahahahahahahahahahahahahahaha', 'pussyfooting', 'bitchtard', 'bitchboi', 'bitches', 'autothots', 'bitching', 'bitchy', 'npcunt', 'bitch']
4. prejudiced discussions ['bitch', 'whorellywood', 'pussypass', 'feminazis', 'feminazi', 'pussy', 'whore', 'femoid', 'pussies', 'trumpaccusers']


In [74]:
import json
with open('Results/TaskB/lexicon_shapley.json', 'w') as outfile:
    json.dump(filtered_lexicons, outfile)