In [59]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification, BartTokenizer, BartForConditionalGeneration
import torch

# RoBERATa and BART to Analysis Sentiment and Text Polishing

In [60]:
def load_roberta_model():
    tokenizer = RobertaTokenizer.from_pretrained('roberta_saved_model')
    model = RobertaForSequenceClassification.from_pretrained('roberta_saved_model', num_labels=4)  
    model.eval()  # Switch to evaluation mode
    return tokenizer, model

# Sentiment and scenario prediction function
def predict_sentiment_and_scenario(model, tokenizer, text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    predictions = torch.sigmoid(logits)  # Multi-label classification, use sigmoid activation function

    # Predicted sentiment and scenario
    work, friend, family, sentiment_numeric = predictions[0].tolist()

    # Judge whether the sentiment is positive or negative
    sentiment = "positive" if sentiment_numeric >= 0.5 else "negative"

    # Scenario label judgment
    if work >= 0.5:
        scenario = "work"
    elif friend >= 0.5:
        scenario = "friend"
    elif family >= 0.5:
        scenario = "family"
    else:
        scenario = "other"
    
    return sentiment, scenario

# Load RoBERTa model
tokenizer, roberta_model = load_roberta_model()

# Input text
text_input = "I hate you, you are so annoying."
predicted_sentiment, predicted_scenario = predict_sentiment_and_scenario(roberta_model, tokenizer, text_input)

print(f"Predicted Sentiment: {predicted_sentiment}, Predicted Scenario: {predicted_scenario}")

Predicted Sentiment: negative, Predicted Scenario: friend


In [61]:
# Load BART model and tokenizer
from transformers import BartTokenizer, BartForConditionalGeneration

def load_bart_model():
    tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
    model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
    return tokenizer, model

# According to sentiment and scenario, refine negative text
def refine_negative_text_bart(model, tokenizer, scenario, original_text):
    if scenario == "work":
        prompt = f"Please rewrite the following message to make it polite and professional: '{original_text}'"
    elif scenario == "friend":
        prompt = f"Please rewrite the following message to make it sound more friendly and casual: '{original_text}'"
    elif scenario == "family":
        prompt = f"Please rewrite the following message to make it warm and understanding: '{original_text}'"
    
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        inputs["input_ids"], 
        max_length=100,  # Adjust value is 100 appropriately to avoid text truncation
        num_return_sequences=1, 
        temperature=0.7,  # Adjust the generation diversity appropriately
        top_k=50 # Limit the top k high probability vocabulary words to avoid generating unreasonable text
    )
    refined_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return refined_text

# Load BART model and tokenizer
# Code from: https://www.kaggle.com/code/shaunshibu/ai-gen-text-ident-multiple-model-training-shaun
bart_tokenizer, bart_model = load_bart_model()


# Polishing the text if the sentiment is negative
if predicted_sentiment == "negative":
    refined_text_bart = refine_negative_text_bart(bart_model, bart_tokenizer, predicted_scenario, text_input)
    print(f"Refined Text with BART:\n{refined_text_bart}")
else:
    print("The input text is already positive, no refinement needed.")

Refined Text with BART:
Please rewrite the following message to make it sound more friendly and casual: 'I hate you, you are so annoying' 'I love you, but you're so annoying.' 'I'm so angry at you, I can't stand you' 'You're annoying, but I'm not mad at you. You're annoying'


In [6]:
from rouge_score import rouge_scorer
from bert_score import score as bert_score

Code from: https://haticeozbolat17.medium.com/bertscore-and-rouge-two-metrics-for-evaluating-text-summarization-systems-6337b1d98917

In [52]:
# Original text and reference text
original_text = "I currently have too much work and I can’t complete the new task tasks."  # 原始输入文本
reference_text = "I can't complete the task tasks because I'm currently too busy with other work."  # 参考文本

# Calculate ROUGE scores
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
rouge_scores = scorer.score(original_text, reference_text)

print(f"ROUGE-1 F1: {rouge_scores['rouge1'].fmeasure:.4f}")
print(f"ROUGE-2 F1: {rouge_scores['rouge2'].fmeasure:.4f}")
print(f"ROUGE-L F1: {rouge_scores['rougeL'].fmeasure:.4f}")

# Calculate BERTScore
P, R, F1 = score([original_text], [reference_text], lang="en", verbose=True)

print(f"BERTScore Precision: {P.mean().item():.4f}")
print(f"BERTScore Recall: {R.mean().item():.4f}")
print(f"BERTScore F1: {F1.mean().item():.4f}")

ROUGE-1 F1: 0.7097
ROUGE-2 F1: 0.3448
ROUGE-L F1: 0.4516


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 2.49 seconds, 0.40 sentences/sec
BERTScore Precision: 0.9245
BERTScore Recall: 0.9233
BERTScore F1: 0.9239


## Code Reference List

Hatice Özbolat (2023). BERTScore and ROUGE: Two Metrics for Evaluating Text Summarization Systems. [online] Medium. Available at: https://haticeozbolat17.medium.com/bertscore-and-rouge-two-metrics-for-evaluating-text-summarization-systems-6337b1d98917 [Accessed 19 Sep. 2024].

shaunshibu (2023). AI Gen Text Ident, Multiple Model Training - Shaun. [online] Kaggle.com. Available at: https://www.kaggle.com/code/shaunshibu/ai-gen-text-ident-multiple-model-training-shaun [Accessed 19 Sep. 2024].