In [1]:
!pip install rouge-score
!pip install tensorflow
!pip install gym
!pip install torch
!pip install transformers==4.20.0
!pip install keras_nlp==0.3.0
!pip install datasets
!pip install huggingface-hub
!pip install nltk

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=02d6775a5c19d46851da3def864953be1b7c69c7d41cc835b7e4d65907640934
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2
Collecting transformers==4.20.0
  Downloading transformers-4.20.0-py3-none-any.whl (4.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.1.0 (from transformers==4.20.0)
  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import gym
import random
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
import tensorflow as tf
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from rouge_score import rouge_scorer
import re
import os
import logging

# Only log error messages
tf.get_logger().setLevel(logging.ERROR)

MAX_STEPS_PER_EPISODE = 15

# Maximum length of the input to the model
MAX_INPUT_LENGTH = 1024

# Minimum and maximum length of the output by the model
MIN_TARGET_LENGTH = 5
MAX_TARGET_LENGTH = 256

# Batch size for training our model
BATCH_SIZE = 4

# Learning rate for training our model
LEARNING_RATE = 5e-4

# Maximum number of episodes to train the RL agent
MAX_EPISODES = 3# You can increase this as needed

# Checkpoint for the model (you can change this to other available models)
MODEL_CHECKPOINT = "t5-small"



def calculate_reward(generated_summary, reference_summary, rating=None):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference_summary, generated_summary)
    base_reward = scores['rougeL'].fmeasure
    if rating is not None:
        rating_factor = rating / 10.0
        reward = base_reward * rating_factor
    else:
        reward = base_reward
    return reward



# Define the RL environment for text summarization
class TextSummarizationEnv(gym.Env):
    def __init__(self, input_text, model, summarization_pipeline):
        super(TextSummarizationEnv, self).__init__()
        self.input_text = input_text
        self.summarization_pipeline = summarization_pipeline
        self.action_space = gym.spaces.Discrete(2)  # Two actions: Summarize (1) or End (0)
        self.observation_space = gym.spaces.Discrete(1)  # Single integer state
        self.episode_length = 0
        self.model = model
        self.text_summarizer = text_summarizer
        self.num_actions = 2
        self.q_table = np.zeros((1, self.action_space.n))  # Q-table for Q-learning

    def step(self, action):
        if action == 1:
            # Summarize the current input text
            summary = self.summarize_input_text()
            self.episode_length += 1

            # Check if max episode length is reached
            done = self.episode_length >= MAX_STEPS_PER_EPISODE

            return 1, summary, False, {}  # State as an integer, summary as reward, not done

        elif action == 0:
            # End the conversation
            return 0, "", True, {}  # State as an integer, empty summary, done

    def reset(self):
        self.episode_length = 0
        return 1  # Initial state as an integer (start summarization)

    def summarize_input_text(self):
     # Dynamically set max length based on input length
     adjusted_max_length = min(len(self.input_text.split()), MAX_TARGET_LENGTH)

     # Summarize the input text using the summarization pipeline
     summary_result = self.summarization_pipeline(self.input_text, max_length=adjusted_max_length, min_length=MIN_TARGET_LENGTH)
     summary = summary_result[0]['summary_text']
     return summary

    def update_q_table(self, state, action, reward, next_state):
        if reward != '':
            reward = float(reward)  # Convert reward to float
            self.q_table = np.zeros((2, self.num_actions))
            current_q_value = self.q_table[state, action]
            best_next_action = np.argmax(self.q_table[next_state])
            learned_value = reward + LEARNING_RATE * self.q_table[next_state, best_next_action] - current_q_value
            self.q_table[state, action] += learned_value
        else:
            # Handle the case when reward is an empty string
            pass

# Summarization pipeline
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)
summarization_model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_CHECKPOINT)
summarization_pipeline = pipeline(
    'summarization',
    model=summarization_model,
    tokenizer=tokenizer,
    framework="tf"
)

def text_summarizer(input_text, previous_summary=None):
    if previous_summary is not None:
        input_text = f"{previous_summary} {input_text}"
    summary_result = summarization_pipeline(input_text, max_length=MAX_TARGET_LENGTH, min_length=MIN_TARGET_LENGTH)
    return summary_result[0]['summary_text']

# Fine-tuning on feedback
def fine_tune_model_on_feedback(model, feedback_data, tokenizer):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device).train()

    inputs = tokenizer([item['input'] for item in feedback_data], padding=True, truncation=True, return_tensors="pt")
    targets = tokenizer([item['corrected_summary'] for item in feedback_data], padding=True, truncation=True, return_tensors="pt")

    inputs = inputs.input_ids.to(device)
    targets = targets.input_ids.to(device)

    dataset = TensorDataset(inputs, targets)
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)

    for input_batch, target_batch in dataloader:
        optimizer.zero_grad()
        outputs = model(input_ids=input_batch, labels=target_batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()



def initialize_rl_environment(input_text):
    return TextSummarizationEnv(input_text, text_summarizer, summarization_pipeline)

def train_rl_agent(rl_env, target_reward, max_episodes, initial_epsilon=0.1, epsilon_decay_rate=0.9):
    feedback_data = []
    previous_rating = None
    epsilon = initial_epsilon

    for episode in range(max_episodes):
        state = rl_env.reset()
        total_reward = 0.0
        done = False
        steps = 0

        while not done and steps < MAX_STEPS_PER_EPISODE:
            if random.random() < epsilon:
                action = random.randint(0, rl_env.action_space.n - 1)
            else:
                action = 1  # Greedy action (always summarize)

            summary = rl_env.text_summarizer(rl_env.input_text)
            if len(summary) > MAX_TARGET_LENGTH:
                summary = summary[:MAX_TARGET_LENGTH]

            print("Input Text:")
            print(rl_env.input_text)
            print("\n")
            print("Summary:")
            print(summary)

            feedback = input("Enter feedback for the summary (or press Enter to skip): ").strip()
            if feedback:
                rating = int(input("Rate the summary (1-10): "))
                corrected_summary = input("Please provide a corrected summary (or press Enter to use the generated one): ").strip()
                reward = calculate_reward(summary, corrected_summary or summary, rating)
                feedback_data.append({
                    'input': rl_env.input_text,
                    'generated_summary': summary,
                    'corrected_summary': corrected_summary or summary
                })
            else:
                reward = calculate_reward(summary, rl_env.input_text)

            next_state, _, done, _ = rl_env.step(action)
            rl_env.update_q_table(state, action, reward, next_state)
            total_reward += reward
            state = next_state
            previous_rating = rating
            epsilon *= epsilon_decay_rate

            steps += 1

        print(f"Episode {episode + 1} - Total Reward: {total_reward}")

        # Fine-tune model on feedback after each episode
        if feedback_data:
            fine_tune_model_on_feedback(summarization_model, feedback_data, tokenizer)
            feedback_data = []  # Clear feedback data

        if total_reward >= target_reward:
            print(f"Target reward of {target_reward} achieved. Stopping training.")
            break


if __name__ == "__main__":
    input_text = '''The full cost of damage in Newton Stewart, one of the areas worst affected, is still being assessed. Repair work is ongoing in Hawick and many roads in Peeblesshire remain badly affected by standing water. Trains on the west coast mainline face disruption due to damage at the Lamington Viaduct. Many businesses and householders were affected by flooding in Newton Stewart after the River Cree overflowed into the town. First Minister Nicola Sturgeon visited the area to inspect the damage. The waters breached a retaining wall, flooding many commercial properties on Victoria Street - the main shopping thoroughfare. Jeanette Tate, who owns the Cinnamon Cafe which was badly affected, said she could not fault the multi-agency response once the flood hit. However, she said more preventative work could have been carried out to ensure the retaining wall did not fail. "It is difficult but I do think there is so much publicity for Dumfries and the Nith - and I totally appreciate that - but it is almost like we're neglected or forgotten," she said. "That may not be true but it is perhaps my perspective over the last few days. "Why were you not ready to help us a bit more when the warning and the alarm alerts had gone out?" Meanwhile, a flood alert remains in place across the Borders because of the constant rain. Peebles was badly hit by problems, sparking calls to introduce more defences in the area. Scottish Borders Council has put a list on its website of the roads worst affected.'''
    rl_env = initialize_rl_environment(input_text)
    target_reward = 9.50
    max_episodes = MAX_EPISODES
    train_rl_agent(rl_env, target_reward, max_episodes)
    np.save('q_table.npy', rl_env.q_table)

    # Save the Model
    SAVE_PATH = "/content/drive/MyDrive/summarization_files"
    summarization_model.save_pretrained(SAVE_PATH)
    tokenizer.save_pretrained(SAVE_PATH)

Input Text:


Summary:
the full cost of damage in Newton Stewart is still being assessed . many roads in peeblesshire remain badly affected by standing water . the water breached a retaining wall, flooding many commercial properties .
Enter feedback for the summary (or press Enter to skip): can be better
Rate the summary (1-10): 4
Please provide a corrected summary (or press Enter to use the generated one): Newton Stewart, a town in Scotland, suffered significant damage from flooding caused by the River Cree. Repair work is ongoing in Hawick, and many roads in Peeblesshire remain severely affected. Trains on the west coast mainline face disruption due to the Lamington Viaduct damage.
Input Text:


Summary:
the full cost of damage in Newton Stewart is still being assessed . many roads in peeblesshire remain badly affected by standing water . the water breached a retaining wall, flooding many commercial properties .
Enter feedback for the summary (or press Enter to skip): can be better
R