In [1]:

import pandas as pd
from openprompt.data_utils import InputExample
import torch
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import balanced_accuracy_score,f1_score,classification_report
from sklearn.model_selection import train_test_split
from openprompt.config import get_config, save_config_to_yaml
from tqdm.contrib import tenumerate



In [2]:
path=r"C:\Users\py22715\OneDrive - University of Bristol\Documents\Python Scripts"
df=pd.read_csv(os.path.join(path,"golden_set2.csv"))

In [3]:
classes = [ # There are two classes in Sentiment Analysis, one for negative and one for positive
    "racial_slur",
    "self_harm",
    "Homophobia",
    "Incivility",
    "harrassment",
    "No Violations"
]

encoder = LabelEncoder()
df["label"]=encoder.fit_transform(df["violation"])

le_name_mapping = dict(zip(encoder.classes_, encoder.transform(encoder.classes_)))

print(le_name_mapping)



{'Homophobia': 0, 'Incivility': 1, 'No Violations': 2, 'harrassment': 3, 'racial_slur': 4}


In [4]:
from openprompt.plms import load_plm
from openprompt.lm_bff_trainer import LMBFFClassificationRunner,ClassificationRunner
plm, tokenizer, model_config, WrapperClass = load_plm("bert", "bert-base-cased")

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.weight', 'cls.seq_relationship.weight', 'bert.pooler.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
from openprompt.prompts import ManualTemplate

promptTemplate = ManualTemplate(
    text = '{"placeholder":"text_a"} was the context for {"placeholder":"text_b"}, Which violation is it? {"mask"}' ,
    tokenizer = tokenizer,
)

bad_words = {
    "racial_slur": ["nigga","nigger","uncle tom","negro","niggerhead","house slave","monkeyboy"],
    "self_harm": ["kill yourself", "commit suicide"],
    "Homophobia":["lesbo","faggot","fag",'cocksucker'],
    "Incivility":["dickhead","twat","cunt","whore","retard","bitch","asshole","dimwit","bullshit","fuck u","fuck you","motherfuck"],
    "harrassment":["penis","pussy","dick","tits","moron","suck dick","sexy bitch"]
}

In [6]:
bad_set=set()
for k,v in bad_words.items():
    for i in bad_words[k]: 
        bad_set.add(i)
all_set=set()
for i in df["context"]:
   for k in i.split(' '):
       all_set.add(k.lower())
    
no_vio = all_set.difference(bad_set)  
no_vio=list(no_vio)
no_vio=no_vio[0:len(no_vio):5000]

    
 
from openprompt.prompts import ManualVerbalizer
promptVerbalizer = ManualVerbalizer(
    classes = classes,
    label_words = {
        "racial_slur": ["nigga","nigger","uncle tom","negro","niggerhead","house slave","monkeyboy"],
        "self_harm": ["kill yourself", "commit suicide"],
        "Homophobia":["lesbo","faggot","fag",'cocksucker'],
        "Incivility":["dickhead","twat","cunt","whore","retard","bitch","asshole","dimwit","bullshit","fuck u","fuck you","motherfuck"],
        "harrassment":["penis","pussy","dick","tits","moron","suck dick","sexy bitch"],
        "No Violations": no_vio
    },
    tokenizer = tokenizer,
)

In [7]:
from openprompt import PromptForClassification
promptModel = PromptForClassification(
    template = promptTemplate,
    plm = plm,
    verbalizer = promptVerbalizer,
    freeze_plm=False,
    plm_eval_mode=False
)


In [8]:
data_train , data_val = train_test_split(df,train_size=0.80,random_state=2018)
#data_train,data_val = train_test_split(data_train_val,train_size=0.25,random_state=2018,stratify=data_train_val['label'])

def InputExampleConverter(df): 
    dataset=[]
    count=0
    violation_sentence = df["sentence"]
    label=df["label"]
    context=df["context"]
    print("Creating Dataset")     
    for i in violation_sentence: 
        dataset.append(InputExample(guid=count,text_a=context[count],text_b=i,label=label[count]))
        count=count+1
    return(dataset)
data_train=InputExampleConverter(data_train.reset_index())
data_val=InputExampleConverter(data_val.reset_index())
#data_test=InputExampleConverter(data_test.reset_index())    

        


from openprompt import PromptDataLoader
data_loader_train = PromptDataLoader(
    dataset = data_train,
    tokenizer = tokenizer,
    template = promptTemplate,
    tokenizer_wrapper_class=WrapperClass,
    shuffle=True
)

data_loader_val = PromptDataLoader(
    dataset = data_val,
    tokenizer = tokenizer,
    template = promptTemplate,
    tokenizer_wrapper_class=WrapperClass,
    shuffle=True
)
'''
data_loader_test = PromptDataLoader(
    dataset = data_test,
    tokenizer = tokenizer,
    template = promptTemplate,
    tokenizer_wrapper_class=WrapperClass,
    shuffle=True
)
'''

Creating Dataset
Creating Dataset


tokenizing: 0it [00:00, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (560 > 512). Running this sequence through the model will result in indexing errors
tokenizing: 173it [00:03, 55.64it/s]
tokenizing: 44it [00:00, 50.17it/s]


'\ndata_loader_test = PromptDataLoader(\n    dataset = data_test,\n    tokenizer = tokenizer,\n    template = promptTemplate,\n    tokenizer_wrapper_class=WrapperClass,\n    shuffle=True\n)\n'

In [9]:
predictions=[]

use_cuda = True
if use_cuda:
    promptModel=  promptModel.cuda()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Now the training is standard
from transformers import  AdamW, get_linear_schedule_with_warmup
loss_func = torch.nn.CrossEntropyLoss()
optimizer = AdamW(promptModel.parameters(), lr=1e-5)



In [10]:

for epoch in range(5):
    tot_loss = 0
    prev_score=0
    val_score=[]
    test_score=[]
    for step, inputs in tenumerate(data_loader_train):
        if use_cuda:
            inputs = inputs.to(device) 
        #print(step)   
        logits = promptModel(inputs)
        logits=logits.to(device)
        labels = inputs['label']
        labels=labels.to(device)
        loss = loss_func(logits,labels.type(torch.LongTensor).to(device))
        loss.backward()
        tot_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()
        if step %5000 ==1:
            print("Epoch {}, average loss: {}".format(epoch, tot_loss/(step+1)), flush=True)
            
    try: 
            torch.save({

            'epoch': epoch,

            'model_state_dict': promptModel.state_dict(),

            'optimizer_state_dict': optimizer.state_dict(),

            'loss': loss,
                    
            'model':promptModel       

            },os.path.join(path,"violation_type_context_model_chkpt.pt"))
                
    except:
            continue      
    allpreds = []
    alllabels = []
    print("getting val score after epoch "+ str(epoch))
    for step, inputs in tenumerate(data_loader_val):
        if use_cuda:
            inputs = inputs.cuda()
        logits = promptModel(inputs)
        labels = inputs['label']
        alllabels.extend(labels.cpu().tolist())
        allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist())

    print(f1_score(alllabels,allpreds,average="micro" ))
    print(classification_report(alllabels,allpreds))  
    
    

  0%|          | 0/173 [00:00<?, ?it/s]

Epoch 0, average loss: 1.5301582217216492
getting val score after epoch 0


  0%|          | 0/44 [00:00<?, ?it/s]

0.7045454545454546
              precision    recall  f1-score   support

           1       1.00      0.11      0.20         9
           2       0.70      1.00      0.82        30
           3       0.00      0.00      0.00         5

    accuracy                           0.70        44
   macro avg       0.57      0.37      0.34        44
weighted avg       0.68      0.70      0.60        44



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/173 [00:00<?, ?it/s]

Epoch 1, average loss: 0.5804618149995804
getting val score after epoch 1


  0%|          | 0/44 [00:00<?, ?it/s]

0.6363636363636364
              precision    recall  f1-score   support

           1       0.35      0.78      0.48         9
           2       0.88      0.70      0.78        30
           3       0.00      0.00      0.00         5

    accuracy                           0.64        44
   macro avg       0.41      0.49      0.42        44
weighted avg       0.67      0.64      0.63        44



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/173 [00:00<?, ?it/s]

Epoch 2, average loss: 0.5237976014614105
getting val score after epoch 2


  0%|          | 0/44 [00:00<?, ?it/s]

0.7045454545454546
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         9
           2       0.71      0.97      0.82        30
           3       1.00      0.40      0.57         5

    accuracy                           0.70        44
   macro avg       0.57      0.46      0.46        44
weighted avg       0.60      0.70      0.62        44



  0%|          | 0/173 [00:00<?, ?it/s]

Epoch 3, average loss: 0.045423186384141445
getting val score after epoch 3


  0%|          | 0/44 [00:00<?, ?it/s]

0.7045454545454546
              precision    recall  f1-score   support

           1       0.40      0.44      0.42         9
           2       0.86      0.83      0.85        30
           3       0.40      0.40      0.40         5

    accuracy                           0.70        44
   macro avg       0.55      0.56      0.56        44
weighted avg       0.72      0.70      0.71        44



  0%|          | 0/173 [00:00<?, ?it/s]

Epoch 4, average loss: 0.09154116152785718
getting val score after epoch 4


  0%|          | 0/44 [00:00<?, ?it/s]

0.7045454545454546
              precision    recall  f1-score   support

           1       0.43      0.33      0.38         9
           2       0.89      0.80      0.84        30
           3       0.40      0.80      0.53         5

    accuracy                           0.70        44
   macro avg       0.57      0.64      0.58        44
weighted avg       0.74      0.70      0.71        44



In [11]:
torch.save(promptModel,os.path.join(path,"violation_detector.pt"))