In [25]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim

from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch

In [2]:
def semantic_similarity(sentence1, sentence2):
    model = SentenceTransformer('distilbert-base-nli-mean-tokens')
    senteces = [sentence1, sentence2]
    sentence_embeddings = model.encode(senteces)
    return cos_sim(sentence_embeddings[0], sentence_embeddings[1]).item()
    

In [3]:
text1 = "come on, Cal, leave that shit alone."
text2 = "come on, Cal, leave it alone."

print(semantic_similarity(text1, text2))

0.9554343223571777


In [32]:
def style_accuracy(text):

    # 1 - non-toxic
    # 0 - toxic

    model_name = 'SkolkovoInstitute/roberta_toxicity_classifier'
    tokenizer = RobertaTokenizer.from_pretrained(model_name)
    model = RobertaForSequenceClassification.from_pretrained(model_name)

    with torch.no_grad():
        encoded = tokenizer(text, return_tensors='pt', padding=True)
        logits = model(**encoded).logits
        result = torch.softmax(logits, dim=1)[:, 0].item()
    return result

In [33]:
text1 = "If you wanna be there, you're gonna have to go my fucking way."
text2 = "if you want to be there, you'll have to go."

print(f'{style_accuracy(text1):.5f}')
print(f'{style_accuracy(text2):.5f}')

Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


0.00393


Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


0.99996


In [6]:
def fluency(text):

    # 1 - fluent
    # 0 - non-fluent

    model_name = "cointegrated/roberta-large-cola-krishna2020"
    tokenizer = RobertaTokenizer.from_pretrained(model_name)
    model = RobertaForSequenceClassification.from_pretrained(model_name)
    
    with torch.no_grad():
        encoded = tokenizer(text, return_tensors='pt', padding=True)
        logits = model(**encoded).logits
        result = torch.softmax(logits, dim=1)[:, 0].item()
    return result

In [7]:
text1 = "If you wanna be there, you're gonna have to go my fucking way."
text2 = "if you wants to be there, you is have to go."

print(f'{fluency(text1):.5f}')
print(f'{fluency(text2):.5f}')

0.99158
0.46748


In [8]:
def j_metric(predictions, targets):
    j = 0
    for prediction, target in zip(predictions, targets):
        j += semantic_similarity(prediction, target) * style_accuracy(prediction) * fluency(prediction)
    return j / len(predictions)

In [9]:
pred = "if you want to be there, you'll have to go."
target = "if you want to come with me, he'll have to be on my own."

print(j_metric([pred], [target]))
print(semantic_similarity(pred, target))
print(style_accuracy(pred))
print(fluency(pred))

Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


2.821343247901035e-05
0.7695345282554626


Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


3.7031139072496444e-05
0.9900582432746887
