[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/anthropic/Safety%20Classifier.ipynb)

## Imports

In [3]:
from datasets import load_dataset
import numpy as np
from sklearn.metrics import f1_score
from transformers import AutoTokenizer,AutoModelForSequenceClassification,TrainingArguments, Trainer
import torch

In [4]:
MAXLEN = 128
MODEL = 'roberta-base'

In [5]:
torch.cuda.is_available()

True

### Load data

In [6]:
dataset = load_dataset("allenai/prosocial-dialog")


Using custom data configuration allenai--prosocial-dialog-ebbad39ca08b6d44
Found cached dataset json (/home/shahul/.cache/huggingface/datasets/allenai___json/allenai--prosocial-dialog-ebbad39ca08b6d44/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


  0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
label_to_id = {"__casual__":0,"__needs_caution__":1,"__needs_intervention__":2,"__probably_needs_caution__":3,"__possibly_needs_caution__":4}

### Data preparation

In [8]:

from torch.utils.data import Dataset
class ProSocialDataset(Dataset):
    
    def __init__(self,split):
        
        super().__init__()
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL)
        self.sep_token  = self.tokenizer.sep_token
        self.dataset = dataset[split]
        self.label2id = label_to_id
        self.id2label = {v:k for k,v in label_to_id.items()}
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self,idx):
        
        context = self.dataset[idx]
        idx_start = idx 
        end = self.dataset[max(0,idx_start-1)]["episode_done"]
        while ((not end) and (idx_start>0)):
            end = self.dataset[max(0,idx_start-2)]["episode_done"]
            idx_start -= 1
        idx_start = max(0,idx_start)
        
        prev_context = [f'{self.dataset[i]["context"]}' for i in range(idx_start,idx)]
        rots = self.dataset[idx]["rots"]
        context = f'{self.dataset[idx]["context"]}' + self.sep_token + "".join(prev_context) + self.sep_token + "".join(rots)
        
        encoding = self.tokenizer(
                    context,
                    max_length=MAXLEN, 
                    add_special_tokens=True,
                    truncation=True, 
                    padding='max_length')
        
        encoding["labels"] = self.label2id[self.dataset[idx]["safety_label"]]
        
        return encoding
        
        
        

### Load Model

In [9]:
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL,num_labels=len(label_to_id))

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.bias', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifi

In [10]:
train_dataset = ProSocialDataset("train")
eval_dataset = ProSocialDataset("validation")

### Define eval metric 

In [13]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"f1":f1_score(labels,predictions,average="micro")}

### Set trainer

In [14]:
training_args = TrainingArguments(output_dir="test_trainer",
                                  overwrite_output_dir = True,
                                  per_device_train_batch_size = 128,
                                  per_device_eval_batch_size= 128,
                                  learning_rate=3e-5,
                                  weight_decay = 0.01,
                                  evaluation_strategy="epoch",
                                  #lr_scheduler_type="cosine",
                                  num_train_epochs=5,
                                  load_best_model_at_end=True,
                                  save_strategy="epoch")
trainer_bert = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

### Train model

In [16]:
trainer_bert.train()

### Save and push to hub

In [2]:
model = AutoModelForSequenceClassification.from_pretrained("./test_trainer/checkpoint-940")

In [15]:
model.push_to_hub("shahules786/prosocial-classifier")

Saving model checkpoint to safety-cls
Configuration saved in safety-cls/config.json
Model weights saved in safety-cls/pytorch_model.bin
