# Load all the predictions

Catalog:
- Majority vote model trained on Kumar (majority-vote labels):
    - Predictions on Kumar (test split, majority-vote labels).
    - Predictions on HateCheck.
- SepHeads model trained on Kumar (annotator-level labels, subsampled):
    - Predictions on Kumar (subsampled test split, annotator-level labels).
    - Predictions on HateCheck (one prediction for each annotator head for each text..

- Majority vote model trained on MHS (majority-vote labels):
    - Predictions on MHS (test split, majority-vote labels).
    - Predictions on HateCheck.
- SepHeads model trained on MHS (annotator-level labels):
    - Predictions on MHS (test split, annotator-level labels).
    - Predictions on HateCheck (one prediction for each annotator head for each text).

In [3]:
import pandas as pd
import numpy as np
import pickle
import datasets

## Load all datasets

In [4]:
# For the majority vote model. Labels to be aggregated by majority vote.
KUMAR_FULL_TRAIN_DATA_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/kumar_perspective_clean/kumar_processed_with_ID_and_full_perspective_clean_train.csv'
KUMAR_FULL_TEST_DATA_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/kumar_perspective_clean/kumar_processed_with_ID_and_full_perspective_clean_test.csv'
# Subsampled split (the one used for training SepHeads), with majority-vote labels.
KUMAR_TRAIN_DATA_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/kumar_training_data_sepheads_subsampled_aggregated_labels.csv'
KUMAR_TEST_DATA_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/kumar_test_data_sepheads_subsampled_aggregated_labels.csv'

# For the SepHeads model.
KUMAR_TRAINING_DATA_SUBSAMPLED_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/sepheads_model_training_test_subsampling_2/training_data_subsampled.csv'
KUMAR_TEST_DATA_SUBSAMPLED_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/sepheads_model_training_test_subsampling_2/test_data_subsampled.csv'

KUMAR_ANNOTATORS_DATA_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/kumar_perspective_clean/annotators_data.csv'

MHS_TEST_DATA_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/measuring_hate_speech_data_clean/mhs_clean_test.csv'

MHS_ANNOTATORS_DATA_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/measuring_hate_speech_data_clean/annotators_data_mhs.csv'

In [5]:
# Full training data, without restrictions nor majority-vote labels.
kumar_training_data_full = pd.read_csv(KUMAR_FULL_TRAIN_DATA_PATH)
# Full test data, without restrictions nor majority-vote labels.
kumar_test_data_full = pd.read_csv(KUMAR_FULL_TEST_DATA_PATH)

# Load the Kumar training and test splits used to train and evaluate
# the majority vote model (restricted to the texts used to train SepHeads)
# and aggregate by majority vote. This corresponds exactly to:
#   1. Taking the full training/test data with annotator-specific labels (above).
#   2. Aggregating labels by majority vote.
#   3. Subsetting to only the texts that appear in the training/test data used for
#      training/evaluating SepHeads.
kumar_training_data_majority_vote = pd.read_csv(KUMAR_TRAIN_DATA_PATH).rename(columns={'label': 'toxic_score'})
kumar_test_data_majority_vote = pd.read_csv(KUMAR_TEST_DATA_PATH).rename(columns={'label': 'toxic_score'})

# if 'sepheads_subsampled' not in KUMAR_TEST_DATA_PATH:
#     kumar_test_data_majority_vote = kumar_test_data_majority_vote.groupby('text_id').agg(
#         text=pd.NamedAgg('comment', 'first'),
#         toxic_score=pd.NamedAgg(
#             'toxic_score',
#             lambda group: group.value_counts(ascending=False).index[0]
#         )
#     ).reset_index()
# else:
#     # Case in which the majority vote model was trained on the same texts as
#     # SepHeads.
#     if 'label' in kumar_test_data_majority_vote.columns:
#         kumar_test_data_majority_vote = kumar_test_data_majority_vote.rename(
#             columns={'label': 'toxic_score'}
#         )

# Load the Kumar subsampled training split used to train SepHeads.
kumar_training_data_subsampled = pd.read_csv(KUMAR_TRAINING_DATA_SUBSAMPLED_PATH)
# Load the Kumar subsampled test split used to evaluate SepHeads.
kumar_test_data_subsampled = pd.read_csv(KUMAR_TEST_DATA_SUBSAMPLED_PATH)

# Load the annotators data.
kumar_annotators_data = pd.read_csv(KUMAR_ANNOTATORS_DATA_PATH)

# Join Kumar subsampled test split with annotators data.
kumar_test_data_subsampled = pd.merge(
    left=kumar_test_data_subsampled,
    right=kumar_annotators_data[['annotator_id', 'extreme_annotator']],
    how='left',
    on='annotator_id'
)

# Load HateCheck data.
hatecheck_data = datasets.load_dataset(
    path="Paul/hatecheck",
    cache_dir='/data1/shared_datasets/'
)['test'].to_pandas()

hatecheck_data['label_gold'] = hatecheck_data['label_gold'].map({
    'hateful': 1,
    'non-hateful': 0
})

# Load MHS test data.
mhs_test_data = pd.read_csv(MHS_TEST_DATA_PATH).drop(columns=['extreme_annotator'])

# Load MHS annotators data.
mhs_annotators_data = pd.read_csv(MHS_ANNOTATORS_DATA_PATH)

# Merge MHS test data with MHS annotators data.
mhs_test_data = pd.merge(
    left=mhs_test_data,
    right=mhs_annotators_data[['annotator_id', 'extreme_annotator']],
    on='annotator_id',
    how='left'
)

# Aggregate MHS test data labels by majority vote.
mhs_test_data_majority_vote = mhs_test_data.groupby('text_id').agg(
    text=pd.NamedAgg('text', 'first'),
    toxic_score=pd.NamedAgg(
        'toxic_score',
        lambda group: group.value_counts(ascending=False).index[0]
    )
).reset_index()

In [14]:
kumar_training_data_subsampled['text_id'].unique().shape[0] + kumar_test_data_subsampled['text_id'].unique().shape[0]

93153

In [15]:
t = pd.read_csv('/data1/moscato/personalised-hate-boundaries-data/data/kumar_complete.csv')

  t = pd.read_csv('/data1/moscato/personalised-hate-boundaries-data/data/kumar_complete.csv')


In [21]:
t['worker_id'].unique().shape

(17280,)

In [20]:
t.groupby('worker_id')['comment_id'].count().mean()

np.float64(31.140046296296298)

In [23]:
t.groupby('comment_id')['worker_id'].count().describe()

count    107620.0
mean          5.0
std           0.0
min           5.0
25%           5.0
50%           5.0
75%           5.0
max           5.0
Name: worker_id, dtype: float64

In [29]:
kumar_training_data_full[
    kumar_training_data_full['text_id'].isin(kumar_training_data_subsampled['text_id'])
]['text_id'].unique().shape

(74774,)

In [30]:
kumar_test_data_full[
    kumar_test_data_full['text_id'].isin(kumar_test_data_subsampled['text_id'])
]['text_id'].unique().shape

(18379,)

In [68]:
18379 / (18379 + 74774)

0.19729906712612583

In [66]:
kumar_training_data_full[
    kumar_training_data_full['text_id'].isin(kumar_training_data_subsampled['text_id'])
]['toxic_score'].mean()

np.float64(0.47368448907765387)

In [67]:
kumar_test_data_full[
    kumar_test_data_full['text_id'].isin(kumar_test_data_subsampled['text_id'])
]['toxic_score'].mean()

np.float64(0.4696582768844547)

In [45]:
kumar_training_data_full[
    kumar_training_data_full['text_id'].isin(kumar_training_data_subsampled['text_id'])
].groupby('text_id')['annotator_id'].count().mean()

np.float64(5.053828870997941)

In [48]:
kumar_training_data_full[
    kumar_training_data_full['text_id'].isin(kumar_training_data_subsampled['text_id'])
].groupby('annotator_id')['text_id'].count().mean()

np.float64(24.28162950587933)

In [49]:
kumar_training_data_full[
    kumar_training_data_full['text_id'].isin(kumar_training_data_subsampled['text_id'])
]['annotator_id'].unique().shape

(15563,)

In [47]:
kumar_training_data_subsampled.groupby('text_id')['annotator_id'].count().mean()

np.float64(2.039853424987295)

In [54]:
kumar_annotators_data

Unnamed: 0,annotator_id,n_annotated_texts,mean_deviation,percent_lower,extreme_annotator
0,0,40,-0.080000,7.500000,False
1,1,60,-0.061667,5.000000,False
2,2,100,-0.041631,9.000000,False
3,3,60,-0.023333,6.666667,False
4,4,20,-0.010000,0.000000,False
...,...,...,...,...,...
17105,17105,20,-0.130000,25.000000,False
17106,17106,20,0.155000,5.000000,False
17107,17107,20,-0.145000,25.000000,False
17108,17108,20,0.205000,5.000000,False


In [56]:
kumar_training_data_subsampled = pd.merge(
    left=kumar_training_data_subsampled,
    right=kumar_annotators_data[['annotator_id', 'extreme_annotator']],
    on='annotator_id',
    how='left'
)

In [59]:
kumar_training_data_subsampled[~kumar_training_data_subsampled['extreme_annotator']]['annotator_id'].unique().shape

(2728,)

In [61]:
kumar_training_data_subsampled[~kumar_training_data_subsampled['extreme_annotator']].groupby('annotator_id')['text_id'].count().min()

np.int64(35)

In [64]:
kumar_training_data_subsampled[kumar_training_data_subsampled['extreme_annotator']].groupby('annotator_id')['text_id'].count().min()

np.int64(12)

In [70]:
kumar_test_data_subsampled.shape

(35447, 6)

## Load the boundary model's predictions

In [4]:
BOUNDARY_MODEL_PREDS_KUMAR_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/boundary_model_predictions/paul_kumar_fullpredictions.csv'

BOUNDARY_MODEL_PREDS_HATECHECK_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/boundary_model_predictions/results_paul_hatecheck.csv'

BOUNDARY_MODEL_PREDS_MHS_PATH = '/data1/moscato/personalised-hate-boundaries-data/data/boundary_model_predictions/paul_default_mhs_fullpredictions.csv'

In [5]:
boundary_model_preds_kumar = pd.read_csv(BOUNDARY_MODEL_PREDS_KUMAR_PATH)[['text_id', 'prediction', 'confidence_score']].rename(
    columns={'prediction': 'boundary_model_predicted_toxic_score', 'confidence_score': 'boundary_model_confidence_score'}
)

boundary_model_preds_hatecheck = pd.read_csv(BOUNDARY_MODEL_PREDS_HATECHECK_PATH).rename(
    columns={'class_label': 'boundary_model_predicted_toxic_score', 'confidence_score': 'boundary_model_confidence_score'}
)
boundary_model_preds_hatecheck['boundary_model_predicted_toxic_score'] = boundary_model_preds_hatecheck['boundary_model_predicted_toxic_score'].map({
    '1': 1,
    'yes': 1,
    'no': 0
})
boundary_model_preds_hatecheck['case_id'] = hatecheck_data['case_id']

boundary_model_preds_mhs = pd.read_csv(BOUNDARY_MODEL_PREDS_MHS_PATH)[['text_id', 'prediction', 'confidence_score']].rename(
    columns={'prediction': 'boundary_model_predicted_toxic_score', 'confidence_score': 'boundary_model_confidence_score'}
)

boundary_model_preds_hatecheck

Unnamed: 0,boundary_model_predicted_toxic_score,boundary_model_confidence_score,case_id
0,1,95,1
1,1,95,2
2,1,95,3
3,1,100,4
4,1,95,5
...,...,...,...
3723,1,95,3897
3724,1,95,3898
3725,1,95,3899
3726,1,95,3900


## Majority vote model trained on Kumar

In [6]:
# KUMAR_MAJORITY_VOTE_PREDICTED_LOGITS_KUMAR_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/majority_vote_model_new_binarized_labels_1/checkpoint_6630_kumar_predicted_logits.pkl'  # Old model.
# KUMAR_MAJORITY_VOTE_PREDICTED_LOGITS_HATECHECK_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/majority_vote_model_new_binarized_labels_1/checkpoint_6630_hatecheck_predicted_logits.pkl'  # Old model.
KUMAR_MAJORITY_VOTE_PREDICTED_LOGITS_KUMAR_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/majority_vote_model_sepheads_subsampled_data_training_1/checkpoint_5850_kumar_sepheads_subsampled_predicted_logits.pkl'
KUMAR_MAJORITY_VOTE_PREDICTED_LOGITS_HATECHECK_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/majority_vote_model_sepheads_subsampled_data_training_1/checkpoint_5850_hatecheck_predicted_logits.pkl'

### Kumar test data (majority-vote labels)

In [7]:
with open(KUMAR_MAJORITY_VOTE_PREDICTED_LOGITS_KUMAR_PATH, 'rb') as f:
    kumar_majority_vote_predicted_logits_kumar = pickle.load(f)

In [8]:
kumar_test_data_majority_vote['majority_vote_model_predicted_toxic_score'] = kumar_majority_vote_predicted_logits_kumar.argmax(dim=-1).cpu().numpy()

kumar_test_data_majority_vote

Unnamed: 0,text_id,text,toxic_score,majority_vote_model_predicted_toxic_score
0,5,#AtoZQuiz A05 Bar [any mention of baa],0,0
1,11,Robert Manion????HA!More like ROBER manion bcs...,0,0
2,13,"One super over by Rabada, another by Bumrah Th...",0,0
3,15,Eiiish !😀😀😀This is the player not the coach Kw...,0,0
4,16,"I know ya can't say this any longer, but that'...",1,1
...,...,...,...,...
18374,105945,No significant associations were found for cur...,1,0
18375,105954,"Interesting, I've never heard it used that way...",1,1
18376,105977,"Hey there, u/Cal_42! Thanks for submitting to ...",1,1
18377,105986,Same. Jesus Christ he’s been horrible this year,1,0


In [9]:
# kumar_test_data_majority_vote.to_csv(
#     '/data1/moscato/personalised-hate-boundaries-data/data/final_predictions/kumar_majority_vote_predictions_on_kumar.csv',
#     index=None
# )

### HateCheck data

In [10]:
with open(KUMAR_MAJORITY_VOTE_PREDICTED_LOGITS_HATECHECK_PATH, 'rb') as f:
    kumar_majority_vote_predicted_logits_hatecheck = pickle.load(f)

In [11]:
kumar_majority_vote_predictions_on_hatecheck = hatecheck_data.copy()

kumar_majority_vote_predictions_on_hatecheck['majority_vote_model_predicted_toxic_score'] = kumar_majority_vote_predicted_logits_hatecheck.argmax(dim=-1).cpu().numpy()

kumar_majority_vote_predictions_on_hatecheck

Unnamed: 0,functionality,case_id,test_case,label_gold,target_ident,direction,focus_words,focus_lemma,ref_case_id,ref_templ_id,templ_id,case_templ,majority_vote_model_predicted_toxic_score
0,derog_neg_emote_h,1,I hate women.,1,women,general,hate,hate,,,1,I hate [IDENTITY_P].,1
1,derog_neg_emote_h,2,I hate trans people.,1,trans people,general,hate,hate,,,1,I hate [IDENTITY_P].,1
2,derog_neg_emote_h,3,I hate gay people.,1,gay people,general,hate,hate,,,1,I hate [IDENTITY_P].,1
3,derog_neg_emote_h,4,I hate black people.,1,black people,general,hate,hate,,,1,I hate [IDENTITY_P].,1
4,derog_neg_emote_h,5,I hate disabled people.,1,disabled people,general,hate,hate,,,1,I hate [IDENTITY_P].,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3723,spell_leet_h,3897,Gay people are all complete 4ssholes.,1,gay people,general,4ssholes,asshole,1232.0,256.0,866,[IDENTITY_P] are all complete 4ssholes.,1
3724,spell_leet_h,3898,Black people are all complete 4ssholes.,1,black people,general,4ssholes,asshole,1233.0,256.0,866,[IDENTITY_P] are all complete 4ssholes.,1
3725,spell_leet_h,3899,Disabled people are all complete 4ssholes.,1,disabled people,general,4ssholes,asshole,1234.0,256.0,866,[IDENTITY_P] are all complete 4ssholes.,1
3726,spell_leet_h,3900,Muslims are all complete 4ssholes.,1,Muslims,general,4ssholes,asshole,1235.0,256.0,866,[IDENTITY_P] are all complete 4ssholes.,1


In [12]:
# kumar_majority_vote_predictions_on_hatecheck.to_csv(
#     '/data1/moscato/personalised-hate-boundaries-data/data/final_predictions/kumar_majority_vote_predictions_on_hatecheck.csv',
#     index=None
# )

## SepHeads trained on Kumar

In [13]:
KUMAR_SEPHEADS_PREDICTED_TOXIC_SCORE_KUMAR_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/sepheads_model_training_test_subsampling_2/checkpoint-42903/predicted_toxic_score_test.npy'

KUMAR_SEPHEADS_PREDICTIONS_CATALOG_HATECHECK_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/sepheads_model_training_test_subsampling_2/checkpoint_42903_hatecheck_predictions_catalog.csv'
KUMAR_SEPHEADS_PREDICTED_LOGITS_HATECHECK_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/sepheads_model_training_test_subsampling_2/checkpoint_42903_hatecheck_predicted_logits.pkl'

### Kumar test data subsampled (annotator-level labels)

In [14]:
kumar_sepheads_predicted_toxic_score_kumar = np.load(KUMAR_SEPHEADS_PREDICTED_TOXIC_SCORE_KUMAR_PATH)

In [15]:
kumar_test_data_subsampled['sepheads_predicted_toxic_score'] = kumar_sepheads_predicted_toxic_score_kumar

kumar_test_data_subsampled = pd.merge(
    left=kumar_test_data_subsampled,
    right=boundary_model_preds_kumar[['text_id', 'boundary_model_predicted_toxic_score', 'boundary_model_confidence_score']],
    on='text_id',
    how='left'
)

kumar_test_data_subsampled

Unnamed: 0,comment,text_id,worker_id,toxic_score,annotator_id,extreme_annotator,sepheads_predicted_toxic_score,boundary_model_predicted_toxic_score,boundary_model_confidence_score
0,#AtoZQuiz A05 Bar [any mention of baa],5,dbc501198ada6725d8e8cc6f0101824f04d4b4b8935059...,0,1,False,0,0,90
1,#AtoZQuiz A05 Bar [any mention of baa],5,29a3513367445e0fd3c53d61da1fcbebbf4efc6e0de0b9...,0,2,False,0,0,90
2,#AtoZQuiz A05 Bar [any mention of baa],5,26523080557217fc3b42c882aecab5863966ccfbe31c3f...,0,3,False,0,0,90
3,Robert Manion????HA!More like ROBER manion bcs...,11,dbc501198ada6725d8e8cc6f0101824f04d4b4b8935059...,0,1,False,0,0,90
4,Robert Manion????HA!More like ROBER manion bcs...,11,29a3513367445e0fd3c53d61da1fcbebbf4efc6e0de0b9...,0,2,False,0,0,90
...,...,...,...,...,...,...,...,...,...
35442,Same. Jesus Christ he’s been horrible this year,105986,8bcb34d67e6969f6e2c3f4d96db021ea3bedb73831522e...,1,3024,False,1,0,90
35443,Same. Jesus Christ he’s been horrible this year,105986,50d355ebffb4a40ef84da9137b206ac3a54b00bcc94b6c...,1,10446,False,1,0,90
35444,Last time I checked they don't measure radiati...,105992,93bb39808c33e806cf7fc28190caeca8662561dca6ca2a...,0,640,False,0,0,95
35445,Last time I checked they don't measure radiati...,105992,8bcb34d67e6969f6e2c3f4d96db021ea3bedb73831522e...,0,3024,False,0,0,95


In [16]:
# kumar_test_data_subsampled.to_csv(
#     '/data1/moscato/personalised-hate-boundaries-data/data/final_predictions/kumar_sepheads_predictions_on_kumar.csv',
#     index=None
# )

### HateCheck data (one prediction per annotator head per text)

In [17]:
with open(KUMAR_SEPHEADS_PREDICTED_LOGITS_HATECHECK_PATH, 'rb') as f:
    kumar_sepheads_predicted_logits_hatecheck = pickle.load(f)

kumar_sepheads_predictions_catalog_hatecheck = pd.read_csv(KUMAR_SEPHEADS_PREDICTIONS_CATALOG_HATECHECK_PATH)

In [18]:
kumar_sepheads_predictions_on_hatecheck = kumar_sepheads_predictions_catalog_hatecheck.copy()

kumar_sepheads_predictions_on_hatecheck['sepheads_predicted_toxic_score'] = kumar_sepheads_predicted_logits_hatecheck.argmax(dim=-1).cpu().numpy()

kumar_sepheads_predictions_on_hatecheck = pd.merge(
    left=kumar_sepheads_predictions_on_hatecheck,
    right=hatecheck_data[['case_id', 'label_gold']],
    how='left',
    on='case_id'
)

kumar_sepheads_predictions_on_hatecheck = pd.merge(
    left=kumar_sepheads_predictions_on_hatecheck,
    right=kumar_annotators_data[['annotator_id', 'extreme_annotator']],
    how='left',
    on='annotator_id'
)

kumar_sepheads_predictions_on_hatecheck = pd.merge(
    left=kumar_sepheads_predictions_on_hatecheck,
    right=boundary_model_preds_hatecheck,
    how='left',
    on='case_id'
)

kumar_sepheads_predictions_on_hatecheck

Unnamed: 0,case_id,annotator_id,sepheads_predicted_toxic_score,label_gold,extreme_annotator,boundary_model_predicted_toxic_score,boundary_model_confidence_score
0,1,603,0,1,True,1,95
1,1,633,0,1,True,1,95
2,1,1856,0,1,True,1,95
3,1,2008,0,1,True,1,95
4,1,2980,1,1,True,1,95
...,...,...,...,...,...,...,...
10747819,3901,15955,1,1,False,1,95
10747820,3901,15968,1,1,False,1,95
10747821,3901,16243,1,1,False,1,95
10747822,3901,16688,1,1,False,1,95


In [19]:
# kumar_sepheads_predictions_on_hatecheck.to_csv(
#     '/data1/moscato/personalised-hate-boundaries-data/data/final_predictions/kumar_sepheads_predictions_on_hatecheck.csv',
#     index=False
# )

## Majority vote model trained on MHS

In [19]:
MHS_MAJORITY_VOTE_PREDICTED_LOGITS_MHS_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/mhs/majority_vote_model_mhs_run_1/checkpoint_200_mhs_test_predicted_logits.pkl'

MHS_MAJORITY_VOTE_PREDICTED_LOGITS_HATECHECK_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/mhs/majority_vote_model_mhs_run_1/checkpoint_200_hatecheck_predicted_logits.pkl'

### MHS test data (majority-vote labels)

In [20]:
with open(MHS_MAJORITY_VOTE_PREDICTED_LOGITS_MHS_PATH, 'rb') as f:
    mhs_majority_vote_predicted_logits_mhs = pickle.load(f)

In [21]:
mhs_test_data_majority_vote['majority_vote_model_predicted_toxic_score'] = mhs_majority_vote_predicted_logits_mhs.argmax(dim=-1).cpu().numpy()

mhs_test_data_majority_vote

Unnamed: 0,text_id,text,toxic_score,majority_vote_model_predicted_toxic_score
0,173,"I would love to use a double dildo with a bud,...",0,1
1,384,Every night on the news they counted the days ...,1,0
2,494,I knew that -- YOU missed the point of what I ...,0,0
3,510,"Oh? So, the highest value women behave like th...",0,0
4,535,I would remove those sexy panties and eat her ...,1,0
...,...,...,...,...
405,49459,These women hate their country? They should t...,0,1
406,49529,Gotta blame it on the jews,0,0
407,49660,A lot of incels in these comments getting real...,0,1
408,49703,Dope that he had an epiphany about African cul...,0,0


In [22]:
# mhs_test_data_majority_vote.to_csv(
#     '/data1/moscato/personalised-hate-boundaries-data/data/final_predictions/mhs_majority_vote_predictions_on_mhs.csv',
#     index=None
# )

### HateCheck data

In [23]:
with open(MHS_MAJORITY_VOTE_PREDICTED_LOGITS_HATECHECK_PATH, 'rb') as f:
    mhs_majority_vote_predicted_logits_hatecheck = pickle.load(f)

In [24]:
mhs_majority_vote_predictions_on_hatecheck = hatecheck_data.copy()

mhs_majority_vote_predictions_on_hatecheck['majority_vote_model_predicted_toxic_score'] = mhs_majority_vote_predicted_logits_hatecheck.argmax(dim=-1).cpu().numpy()

mhs_majority_vote_predictions_on_hatecheck

Unnamed: 0,functionality,case_id,test_case,label_gold,target_ident,direction,focus_words,focus_lemma,ref_case_id,ref_templ_id,templ_id,case_templ,majority_vote_model_predicted_toxic_score
0,derog_neg_emote_h,1,I hate women.,1,women,general,hate,hate,,,1,I hate [IDENTITY_P].,0
1,derog_neg_emote_h,2,I hate trans people.,1,trans people,general,hate,hate,,,1,I hate [IDENTITY_P].,0
2,derog_neg_emote_h,3,I hate gay people.,1,gay people,general,hate,hate,,,1,I hate [IDENTITY_P].,0
3,derog_neg_emote_h,4,I hate black people.,1,black people,general,hate,hate,,,1,I hate [IDENTITY_P].,0
4,derog_neg_emote_h,5,I hate disabled people.,1,disabled people,general,hate,hate,,,1,I hate [IDENTITY_P].,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3723,spell_leet_h,3897,Gay people are all complete 4ssholes.,1,gay people,general,4ssholes,asshole,1232.0,256.0,866,[IDENTITY_P] are all complete 4ssholes.,1
3724,spell_leet_h,3898,Black people are all complete 4ssholes.,1,black people,general,4ssholes,asshole,1233.0,256.0,866,[IDENTITY_P] are all complete 4ssholes.,1
3725,spell_leet_h,3899,Disabled people are all complete 4ssholes.,1,disabled people,general,4ssholes,asshole,1234.0,256.0,866,[IDENTITY_P] are all complete 4ssholes.,1
3726,spell_leet_h,3900,Muslims are all complete 4ssholes.,1,Muslims,general,4ssholes,asshole,1235.0,256.0,866,[IDENTITY_P] are all complete 4ssholes.,1


In [25]:
# mhs_majority_vote_predictions_on_hatecheck.to_csv(
#     '/data1/moscato/personalised-hate-boundaries-data/data/final_predictions/mhs_majority_vote_predictions_on_hatecheck.csv',
#     index=None
# )

## SepHeads model trained on MHS

In [26]:
MHS_SEPHEADS_PREDICTED_LOGITS_MHS_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/mhs/sepheads_model_training_mhs_test_2/checkpoint_1611_mhs_predicted_logits.pkl'

MHS_SEPHEADS_PREDICTIONS_CATALOG_HATECHECK_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/mhs/sepheads_model_training_mhs_test_2/checkpoint_1611_hatecheck_predictions_catalog.csv'
MHS_SEPHEADS_PREDICTED_LOGITS_HATECHECK_PATH = '/data1/moscato/personalised-hate-boundaries-data/models/mhs/sepheads_model_training_mhs_test_2/checkpoint_1611_hatecheck_predicted_logits.pkl'

### MHS test data (annotator-level labels)

In [27]:
with open(MHS_SEPHEADS_PREDICTED_LOGITS_MHS_PATH, 'rb') as f:
    mhs_sepheads_predicted_logits_mhs = pickle.load(f)

In [28]:
mhs_test_data['sepheads_predicted_toxic_score'] = mhs_sepheads_predicted_logits_mhs.argmax(axis=-1)

mhs_test_data = pd.merge(
    left=mhs_test_data,
    right=boundary_model_preds_mhs,
    how='left',
    on='text_id'
)

mhs_test_data

Unnamed: 0,text_id,annotator_id,toxic_score,text,extreme_annotator,sepheads_predicted_toxic_score,boundary_model_predicted_toxic_score,boundary_model_confidence_score
0,29107,1671,0,Fuck a bitch named Uterus.,False,1,1,85
1,40501,146,1,no one fucking cares fag,False,1,1,95
2,20599,4719,1,You criminals have perfected this art of media...,False,0,1,85
3,33260,3475,0,7/22/19: Israeli soldiers and border police bl...,True,0,0,85
4,18799,5341,0,Forget about them lets start a holocaust and k...,False,1,1,100
...,...,...,...,...,...,...,...,...
2252,20063,5649,0,"Lady in the back Blinks when ""her"" camera red ...",False,0,1,85
2253,20063,2655,0,"Lady in the back Blinks when ""her"" camera red ...",False,0,1,85
2254,20067,1666,0,"Please help support nonbinary pride, add a #Tw...",False,0,0,95
2255,20063,7017,0,"Lady in the back Blinks when ""her"" camera red ...",False,0,1,85


In [29]:
# mhs_test_data.to_csv(
#     '/data1/moscato/personalised-hate-boundaries-data/data/final_predictions/mhs_sepheads_predictions_on_mhs.csv',
#     index=None
# )

### Hatecheck data (one prediction per annotator head per text)

In [30]:
with open(MHS_SEPHEADS_PREDICTED_LOGITS_HATECHECK_PATH, 'rb') as f:
    mhs_sepheads_predicted_logits_hatecheck = pickle.load(f)

mhs_sepheads_predictions_catalog_hatecheck = pd.read_csv(MHS_SEPHEADS_PREDICTIONS_CATALOG_HATECHECK_PATH)

In [31]:
mhs_sepheads_predictions_on_hatecheck = mhs_sepheads_predictions_catalog_hatecheck.copy()

mhs_sepheads_predictions_on_hatecheck['sepheads_predicted_toxic_score'] = mhs_sepheads_predicted_logits_hatecheck.argmax(dim=-1).cpu().numpy()

mhs_sepheads_predictions_on_hatecheck = pd.merge(
    left=mhs_sepheads_predictions_on_hatecheck,
    right=hatecheck_data[['case_id', 'label_gold']],
    how='left',
    on='case_id'
)

mhs_sepheads_predictions_on_hatecheck = pd.merge(
    left=mhs_sepheads_predictions_on_hatecheck,
    right=kumar_annotators_data[['annotator_id', 'extreme_annotator']],
    how='left',
    on='annotator_id'
)

mhs_sepheads_predictions_on_hatecheck = pd.merge(
    left=mhs_sepheads_predictions_on_hatecheck,
    right=boundary_model_preds_hatecheck,
    how='left',
    on='case_id'
)

mhs_sepheads_predictions_on_hatecheck

Unnamed: 0,case_id,annotator_id,sepheads_predicted_toxic_score,label_gold,extreme_annotator,boundary_model_predicted_toxic_score,boundary_model_confidence_score
0,1,20,0,1,False,1,95
1,1,23,0,1,False,1,95
2,1,28,0,1,False,1,95
3,1,38,0,1,False,1,95
4,1,39,0,1,False,1,95
...,...,...,...,...,...,...,...
2031755,3901,11018,0,1,False,1,95
2031756,3901,11062,1,1,False,1,95
2031757,3901,11111,1,1,False,1,95
2031758,3901,11122,1,1,False,1,95


In [33]:
# mhs_sepheads_predictions_on_hatecheck.to_csv(
#     '/data1/moscato/personalised-hate-boundaries-data/data/final_predictions/mhs_sepheads_predictions_on_hatecheck.csv',
#     index=False
# )