In [None]:
# !pip install datasets
# !pip install rouge_score

In [6]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from rouge_score import rouge_scorer

class SelfRefine:
    def __init__(self, tokenizer, model, f_model):
        self.original_text = None
        self.refined_text = None
        self.previous_refined_text = None
        self.model = model
        self.tokenizer = tokenizer
        self.feedback = None
        self.feedback_model = f_model

    # Generate input text
    def generator(self, file_name):
        with open(file_name, 'r', encoding='utf-8') as file:
            self.original_text = [line.strip() for line in file]
        print("Original text loaded.")

    # Feedback
    def generate_feedback(self):
        if not self.refined_text and not self.original_text:
            raise ValueError("No text available to generate feedback.")

        # Use refined text if available, otherwise use original_text
        text_to_feedback = self.refined_text if self.refined_text else self.original_text

        # Get feedback
        issues = feedback_model(text_to_feedback)
        if self.previous_refined_text and self.previous_refined_text == text_to_feedback:
            feedback = "No significant improvement detected. Try rephrasing or restructuring the content."
        else:
            feedback = f"Detected Issue: {issues[0]['label']}. Consider revising."

        self.feedback = feedback

        print("Generated feedback.")
        return feedback

    # Refinement
    def refinement(self):
        if not self.original_text:
            raise ValueError("No original text found. Run generator first.")

        # Choose text to refine
        if self.refined_text:
            self.previous_refined_text = self.refined_text[:]
        else:
            self.previous_refined_text = self.original_text[:]

        # Add feedback
        if self.feedback:
            text_to_refine = [f"{text} [Feedback: {self.feedback}]" for text in self.previous_refined_text]
        else:
            text_to_refine = self.previous_refined_text

        # Tokenize the text
        inputs = self.tokenizer(text_to_refine, padding=True, truncation=True, return_tensors="pt", max_length=512)

        # Generate refined text
        with torch.no_grad():
            outputs = self.model.generate(**inputs, max_length=512)

        # Decode the refined text
        self.refined_text = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
        print("Refined text generated.")

    # Evaluation metric 1
    def calculate_bleu(self, target, generated):
        reference = [t.split() for t in target]
        hypothesis = [g.split() for g in generated]
        score = corpus_bleu(reference, hypothesis, smoothing_function=SmoothingFunction().method1)
        return score

    # Evaluation metric 2
    def calculate_rouge(self, target, generated):
        scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
        scores = [scorer.score(t, g) for t, g in zip(target, generated)]
        return sum(s['rougeL'].fmeasure for s in scores) / len(scores)

# Main loop
def main(file_name, tokenizer, model, cycles, f_model):
    test = SelfRefine(tokenizer=tokenizer, model=model, f_model=f_model)

    # Load original text
    test.generator(file_name)

    # Run feedback and refinement cycles
    for c in range(cycles):
        print(f"Cycle {c + 1}: ")
        feedback = test.generate_feedback()
        print(f"Feedback: {feedback}")
        test.refinement()

    # Final results
    print(f"Original text: {test.original_text[:1]}...")
    print(f"Final feedback: {test.feedback}")
    print(f"Final refined text: {test.refined_text[:1]}...")

    # Evaluate
    rouge_score = test.calculate_rouge(test.original_text, test.refined_text)
    # bleu_score = test.calculate_bleu(test.original_text, test.refined_text)
    print(f"Rouge Score: {rouge_score}")

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
feedback_model = pipeline("text-classification", model="textattack/roberta-base-CoLA")

# Run test
main(file_name='moby_dick.txt', tokenizer=tokenizer, model=model, cycles=5, f_model=feedback_model)


Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


Original text loaded.
Cycle 1: 
Generated feedback.
Feedback: Detected Issue: LABEL_0. Consider revising.
Refined text generated.
Cycle 2: 
Generated feedback.
Feedback: Detected Issue: LABEL_0. Consider revising.
Refined text generated.
Cycle 3: 
Generated feedback.
Feedback: Detected Issue: LABEL_0. Consider revising.
Refined text generated.
Cycle 4: 
Generated feedback.
Feedback: Detected Issue: LABEL_0. Consider revising.
Refined text generated.
Cycle 5: 
Generated feedback.
Feedback: Detected Issue: LABEL_0. Consider revising.
Refined text generated.
Original text: ['Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen and regulating the circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever I fin