## Dataset

In [25]:
from datasets import load_dataset

dataset = load_dataset("metaeval/social-chemestry-101")

In [26]:
dataset

DatasetDict({
    train: Dataset({
        features: ['area', 'm', 'split', 'rot-agree', 'rot-categorization', 'rot-moral-foundations', 'rot-char-targeting', 'rot-bad', 'rot-judgment', 'action', 'action-agency', 'action-moral-judgment', 'action-agree', 'action-legal', 'action-pressure', 'action-char-involved', 'action-hypothetical', 'situation', 'situation-short-id', 'rot', 'rot-id', 'rot-worker-id', 'breakdown-worker-id', 'n-characters', 'characters'],
        num_rows: 355922
    })
})

In [33]:
dataset['train'][10]

{'area': 'amitheasshole',
 'm': 1,
 'split': 'train',
 'rot-agree': 3.0,
 'rot-categorization': 'morality-ethics',
 'rot-moral-foundations': 'care-harm|loyalty-betrayal',
 'rot-char-targeting': 'char-1',
 'rot-bad': 0,
 'rot-judgment': "it's okay",
 'action': 'feeling angry when you find out your roommate lied to you',
 'action-agency': 'experience',
 'action-moral-judgment': None,
 'action-agree': 3.0,
 'action-legal': None,
 'action-pressure': None,
 'action-char-involved': None,
 'action-hypothetical': None,
 'situation': "telling my roommate with 2 months notice that I'm going to move out, then after he bought a house to live in, change my mind and getting a new roommate",
 'situation-short-id': 'reddit/amitheasshole/adwxny',
 'rot': "If you find out that your roommate lied to you, it's okay to feel angry.",
 'rot-id': 'rot/reddit/amitheasshole/adwxny/3DH6GAKTY14IXU2D5WHPXGPIFGTZYG/129/4',
 'rot-worker-id': 129,
 'breakdown-worker-id': 17,
 'n-characters': 3,
 'characters': 'narrat

'rot' = 'rot-judgement' + 'action'.  1–5 RoTs for each 'situation'    (see https://github.com/mbforbes/social-chemistry-101)

##### RoT attributes:
- rot
- rot-agree: asks how many people probably agree with the RoT as stated. How universally held is the rule-of-thumb as a belief?
    - 0: almost no one
    - 1: uncommon
    - 2: contreversial
    - 3: common
    - 4: universal
- rot-categorization: the category of the rule of thumb. (15 in total)
- rot-moral-foundations: define fundamental axes of morality. (5 in total)
- rot-char-targeting: The relevant character who in the situation is the person to who you would tell this rule-of-thumb. e.g. char-1 = my roommate (characters[1])
- rot-bad: 0 or 1, 98% of elements have the value 0
- rot-judgement: the judgement of the action

##### Action attributes:
- action
- action-agency: agency or experience (is designed to let workers distinguish RoTs that involve agentive action from those that indicate an an experience)
- action-moral-judgment: This is an intuitive reaction of whether something is good or bad.
    - -2: very bad
    - -1: bad
    - 0: expected/OK
    - 1: good
    - 2: very good
- action-agree: what portion of people probably agree with the judgment given the action
    - 0: rare (< 1%)
    - 1: few (5-25%)
    - 2: controversial (~50%)
    - 3: most (75-90%)
    - 4: all (> 99%)
- action-legal: corresponds to prescriptive norms: what one ought to do (legal, tolerated, illegal, null)
- action-pressure: cultural pressure, measures to what degree someone feels socially influenced to do (or avoid) an action.
    - -2: strongly-against
    - -1: against
    - 0: discretionary
    - 1: for
    - 2: strongly for
- action-char-involved: 
- action-hypothetical: indicates whether the candidate character is explicitly taking the action in the situation: explicit-no, probable-no, hypothetical, probable, explicit

In [27]:
train_dataset = dataset.filter(lambda example: example['split'] == 'train')
train_dataset

DatasetDict({
    train: Dataset({
        features: ['area', 'm', 'split', 'rot-agree', 'rot-categorization', 'rot-moral-foundations', 'rot-char-targeting', 'rot-bad', 'rot-judgment', 'action', 'action-agency', 'action-moral-judgment', 'action-agree', 'action-legal', 'action-pressure', 'action-char-involved', 'action-hypothetical', 'situation', 'situation-short-id', 'rot', 'rot-id', 'rot-worker-id', 'breakdown-worker-id', 'n-characters', 'characters'],
        num_rows: 233501
    })
})

In [28]:
test_dataset = dataset.filter(lambda example: example['split'] == 'test')
test_dataset

DatasetDict({
    train: Dataset({
        features: ['area', 'm', 'split', 'rot-agree', 'rot-categorization', 'rot-moral-foundations', 'rot-char-targeting', 'rot-bad', 'rot-judgment', 'action', 'action-agency', 'action-moral-judgment', 'action-agree', 'action-legal', 'action-pressure', 'action-char-involved', 'action-hypothetical', 'situation', 'situation-short-id', 'rot', 'rot-id', 'rot-worker-id', 'breakdown-worker-id', 'n-characters', 'characters'],
        num_rows: 29239
    })
})

### Preprocessing the train dataset

In [37]:
"""Distinct situations"""
distinct_situations = set()

def find_distinct_situations(example):
    distinct_situations.add(example['situation'])

In [38]:
train_dataset.map(find_distinct_situations)

Map:   0%|          | 1109/233501 [00:00<00:42, 5490.07 examples/s]

Map: 100%|██████████| 233501/233501 [00:34<00:00, 6720.30 examples/s]


DatasetDict({
    train: Dataset({
        features: ['area', 'm', 'split', 'rot-agree', 'rot-categorization', 'rot-moral-foundations', 'rot-char-targeting', 'rot-bad', 'rot-judgment', 'action', 'action-agency', 'action-moral-judgment', 'action-agree', 'action-legal', 'action-pressure', 'action-char-involved', 'action-hypothetical', 'situation', 'situation-short-id', 'rot', 'rot-id', 'rot-worker-id', 'breakdown-worker-id', 'n-characters', 'characters'],
        num_rows: 233501
    })
})

In [39]:
distinct_situations = list(distinct_situations)

In [42]:
len(distinct_situations)

82685

In [43]:
"""Distinct RoTs"""
distinct_rots = set()

def find_distinct_rots(example):
    distinct_rots.add(example['rot'])

In [44]:
train_dataset.map(find_distinct_rots)

Map: 100%|██████████| 233501/233501 [00:34<00:00, 6753.24 examples/s]


DatasetDict({
    train: Dataset({
        features: ['area', 'm', 'split', 'rot-agree', 'rot-categorization', 'rot-moral-foundations', 'rot-char-targeting', 'rot-bad', 'rot-judgment', 'action', 'action-agency', 'action-moral-judgment', 'action-agree', 'action-legal', 'action-pressure', 'action-char-involved', 'action-hypothetical', 'situation', 'situation-short-id', 'rot', 'rot-id', 'rot-worker-id', 'breakdown-worker-id', 'n-characters', 'characters'],
        num_rows: 233501
    })
})

In [45]:
distinct_rots = list(distinct_rots)
len(distinct_rots)

209600

In [47]:
"""RoTs for each situation"""
rots_per_situation = {key:[] for key in distinct_situations}

def rots_for_each_situation(example):
    rots_per_situation[example['situation']].append(example['rot'])

In [48]:
train_dataset.map(rots_for_each_situation)

Map: 100%|██████████| 233501/233501 [00:36<00:00, 6452.06 examples/s]


DatasetDict({
    train: Dataset({
        features: ['area', 'm', 'split', 'rot-agree', 'rot-categorization', 'rot-moral-foundations', 'rot-char-targeting', 'rot-bad', 'rot-judgment', 'action', 'action-agency', 'action-moral-judgment', 'action-agree', 'action-legal', 'action-pressure', 'action-char-involved', 'action-hypothetical', 'situation', 'situation-short-id', 'rot', 'rot-id', 'rot-worker-id', 'breakdown-worker-id', 'n-characters', 'characters'],
        num_rows: 233501
    })
})

In [52]:
print(distinct_situations[4])
rots_per_situation[distinct_situations[4]] 

breaking up with my cheating ex to pursue someone I had caught feelings for whilst with her


["You shouldn't cheat on others.",
 "It's good to go after someone you have feelings for.",
 "It's good to break up with someone that is cheating on you.",
 "It's good to break up with someone before you go after someone else."]

### Preprocessing the train dataset for the binary classification task

In [53]:
from transformers import AutoTokenizer
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [56]:
sentence_a = "this is a sentence"
sentence_b = "this is another sentence"

encoding = tokenizer(sentence_a, sentence_b, padding="max_length", truncation=True)
tokenizer.decode(encoding["input_ids"])
encoding.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

In [60]:
import random

def negative_examples(example):

    negative_rot = None
    #sample a negative RoT for the current situation
    while negative_rot is None:
        candidate_rot = random.choice(distinct_rots)
        if candidate_rot not in rots_per_situation[example['situation']]:
            negative_rot = candidate_rot

    encoding = tokenizer(example['situation'], negative_rot, padding="max_length", truncation=True)

    encoding['labels'] = 0

    return encoding

In [61]:
negative_dataset = train_dataset.map(negative_examples)
negative_dataset = negative_dataset.select_columns(['input_ids', 'token_type_ids', 'attention_mask', 'labels']) #filter the columns
negative_dataset

Map: 100%|██████████| 233501/233501 [03:35<00:00, 1085.00 examples/s]


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 233501
    })
})

In [65]:
def positive_examples(example):
    encoding = tokenizer(example['situation'], example['rot'], padding="max_length", truncation=True)
    encoding['labels'] = 1

    return encoding

In [66]:
positive_dataset = train_dataset.map(positive_examples)
positive_dataset = positive_dataset.select_columns(['input_ids', 'token_type_ids', 'attention_mask', 'labels']) #filter the columns
positive_dataset

Map: 100%|██████████| 233501/233501 [03:24<00:00, 1144.49 examples/s]


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 233501
    })
})

In [69]:
from datasets import concatenate_datasets

bert_train_dataset = concatenate_datasets([negative_dataset['train'], positive_dataset['train']])
bert_train_dataset = bert_train_dataset.shuffle(seed=42)
bert_train_dataset

  table = cls._concat_blocks(blocks, axis=0)


Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 467002
})

### Save BERT train dataset

In [75]:
path = "/home/IAIS/gplepi/entero/data_social_norms/binary_classification_bert_train"
bert_train_dataset.save_to_disk(path)

Saving the dataset (3/3 shards): 100%|██████████| 467002/467002 [00:26<00:00, 17298.74 examples/s]


### Preprocessing the test dataset

In [84]:
"""Distinct situations"""
distinct_situations = set()

def find_distinct_situations(example):
    distinct_situations.add(example['situation'])

In [85]:
test_dataset.map(find_distinct_situations)

Map: 100%|██████████| 29239/29239 [00:04<00:00, 6785.99 examples/s]


DatasetDict({
    train: Dataset({
        features: ['area', 'm', 'split', 'rot-agree', 'rot-categorization', 'rot-moral-foundations', 'rot-char-targeting', 'rot-bad', 'rot-judgment', 'action', 'action-agency', 'action-moral-judgment', 'action-agree', 'action-legal', 'action-pressure', 'action-char-involved', 'action-hypothetical', 'situation', 'situation-short-id', 'rot', 'rot-id', 'rot-worker-id', 'breakdown-worker-id', 'n-characters', 'characters'],
        num_rows: 29239
    })
})

In [86]:
distinct_situations = list(distinct_situations)
len(distinct_situations)

10361

In [87]:
"""Distinct RoTs"""
distinct_rots = set()

def find_distinct_rots(example):
    distinct_rots.add(example['rot'])

In [88]:
test_dataset.map(find_distinct_rots)

Map: 100%|██████████| 29239/29239 [00:04<00:00, 6767.49 examples/s]


DatasetDict({
    train: Dataset({
        features: ['area', 'm', 'split', 'rot-agree', 'rot-categorization', 'rot-moral-foundations', 'rot-char-targeting', 'rot-bad', 'rot-judgment', 'action', 'action-agency', 'action-moral-judgment', 'action-agree', 'action-legal', 'action-pressure', 'action-char-involved', 'action-hypothetical', 'situation', 'situation-short-id', 'rot', 'rot-id', 'rot-worker-id', 'breakdown-worker-id', 'n-characters', 'characters'],
        num_rows: 29239
    })
})

In [89]:
distinct_rots = list(distinct_rots)
len(distinct_rots)

28077

In [90]:
"""RoTs for each situation"""
rots_per_situation = {key:[] for key in distinct_situations}

def rots_for_each_situation(example):
    rots_per_situation[example['situation']].append(example['rot'])

In [91]:
test_dataset.map(rots_for_each_situation)

Map: 100%|██████████| 29239/29239 [00:04<00:00, 6210.13 examples/s]


DatasetDict({
    train: Dataset({
        features: ['area', 'm', 'split', 'rot-agree', 'rot-categorization', 'rot-moral-foundations', 'rot-char-targeting', 'rot-bad', 'rot-judgment', 'action', 'action-agency', 'action-moral-judgment', 'action-agree', 'action-legal', 'action-pressure', 'action-char-involved', 'action-hypothetical', 'situation', 'situation-short-id', 'rot', 'rot-id', 'rot-worker-id', 'breakdown-worker-id', 'n-characters', 'characters'],
        num_rows: 29239
    })
})

### Preprocessing the test dataset for binary classification task

In [92]:
from transformers import AutoTokenizer
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [93]:
import random

def negative_examples(example):

    negative_rot = None
    #sample a negative RoT for the current situation
    while negative_rot is None:
        candidate_rot = random.choice(distinct_rots)
        if candidate_rot not in rots_per_situation[example['situation']]:
            negative_rot = candidate_rot

    encoding = tokenizer(example['situation'], negative_rot, padding="max_length", truncation=True)

    encoding['labels'] = 0

    return encoding

In [94]:
negative_dataset = test_dataset.map(negative_examples)
negative_dataset = negative_dataset.select_columns(['input_ids', 'token_type_ids', 'attention_mask', 'labels']) #filter the columns
negative_dataset

Map: 100%|██████████| 29239/29239 [00:27<00:00, 1055.54 examples/s]


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 29239
    })
})

In [95]:
def positive_examples(example):
    encoding = tokenizer(example['situation'], example['rot'], padding="max_length", truncation=True)
    encoding['labels'] = 1

    return encoding

In [96]:
positive_dataset = test_dataset.map(positive_examples)
positive_dataset = positive_dataset.select_columns(['input_ids', 'token_type_ids', 'attention_mask', 'labels']) #filter the columns
positive_dataset

Map: 100%|██████████| 29239/29239 [00:28<00:00, 1008.51 examples/s]


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 29239
    })
})

In [97]:
from datasets import concatenate_datasets

bert_test_dataset = concatenate_datasets([negative_dataset['train'], positive_dataset['train']])
bert_test_dataset = bert_test_dataset.shuffle(seed=42)
bert_test_dataset

  table = cls._concat_blocks(blocks, axis=0)


Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 58478
})

### Save BERT test dataset

In [98]:
path = "/home/IAIS/gplepi/entero/data_social_norms/binary_classification_bert_test"
bert_test_dataset.save_to_disk(path)

Saving the dataset (1/1 shards): 100%|██████████| 58478/58478 [00:03<00:00, 17152.69 examples/s]


## Binary classification BERT

### Load BERT train and test dataset

In [109]:
import datasets

path_train = "/home/IAIS/gplepi/entero/data_social_norms/binary_classification_bert_train"
path_test = "/home/IAIS/gplepi/entero/data_social_norms/binary_classification_bert_test"
bert_train_dataset = datasets.load_from_disk(path_train)
bert_test_dataset = datasets.load_from_disk(path_test)

  table = cls._concat_blocks(blocks, axis=0)


### Training

In [162]:
from transformers import AutoTokenizer
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [163]:
id2label = {0: "NOT-APPLIED", 1: "APPLIED"}

label2id = {"NOT-APPLIED": 0, "APPLIED": 1}

In [164]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [165]:
import torch

print(torch.cuda.is_available())
device = torch.device('cuda:0')
model = model.to(device) 

True


In [166]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="output_social_norms",
    learning_rate=2e-5,
    auto_find_batch_size=True,
    num_train_epochs=3,
    evaluation_strategy="steps",
    eval_steps=1000,
    save_steps=1000,
    save_strategy="steps",
    metric_for_best_model='f1',
    greater_is_better=True,
    load_best_model_at_end=True,
    push_to_hub=False
)

In [167]:
from transformers import DataCollatorWithPadding
import evaluate

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    accuracy_score = accuracy.compute(predictions=predictions, references=labels)
    precision_score = precision.compute(predictions=predictions, references=labels)
    recall_score = recall.compute(predictions=predictions, references=labels)
    f1_score = f1.compute(predictions=predictions, references=labels)
    
    return {
        "precision": precision_score['precision'],
        "recall": recall_score['recall'],
        "f1": f1_score['f1'],
        "accuracy": accuracy_score['accuracy'],
    }

In [168]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=bert_train_dataset,
    eval_dataset=bert_test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

In [169]:
trainer.train()

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
