In [1]:
#%pip install transformers gym deap numpy pandas scipy
#%pip install nltk
%pip install torch


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import nltk
import torch
import numpy as np
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

model_name = 'gpt2'
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model.generation_config.pad_token_id = tokenizer.pad_token_id

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import gym
import numpy as np
from gym import spaces

class TextGenerationEnv(gym.Env):
    def __init__(self, model, tokenizer, max_length=50):
        super(TextGenerationEnv, self).__init__()
        self.model = model
        self.tokenizer = tokenizer
        self.max_length = 50
        self.action_space = spaces.Discrete(len(tokenizer))
        self.observation_space = spaces.Box(0, len(tokenizer) - 1, (max_length,), dtype=np.int32)
        self.last_gen_text = ""
        self.threshold = 0.95
        self.prompt=""
        self.top_k = 50
        self.counter = 0

    def reset(self, prompt=""):
        self.prompt = prompt
        self.generated_text = [prompt]
        self.current_length = len(self.generated_text)
        self.counter = 0
        return self._get_observation()

    def step(self, action):
        input_ids = self.tokenizer.encode(self.prompt, return_tensors='pt')
        outputs = self.model.generate(input_ids, max_length=50, do_sample=True) 

        self.generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        reward = self._calculate_reward()
        self.counter += 1
        done = (reward == 1) or (self.counter > self.top_k)
        #print(self.generated_text)
        if done:
            self.counter = 0
        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        return np.array(self.tokenizer.encode(self.generated_text, max_length=self.max_length))

    def _calculate_reward(self):
        generated_text_str = ' '.join(self.generated_text)
        if "bank" in self.generated_text or "bank" in self.generated_text:
            reward = 1
        else:
            reward = 0
        self.last_gen_text = generated_text_str
       
        return reward

    def get_last_text(self):
        return self.last_gen_text

    def print_last_gen_test(self):
        print( self.last_gen_text)




In [4]:
import logging
import time
pruned_actions = []

class RLAgent:
    def __init__(self, env):
        self.env = env
        state_size = np.prod(env.observation_space.shape)  # Total size based on the observation space shape
        action_size = env.action_space.n
        self.q_table = np.zeros((state_size, action_size))
        self.learning_rate = 0.1
        self.discount_factor = 0.99
        self.exploration_rate = 1.0
        self.exploration_decay = 0.995
        self.generated_sequences = []

    def train(self, episodes=20, prompt=""):
        logging.basicConfig(level=logging.INFO)
        for episode in range(episodes):
            state = self.env.reset(prompt=prompt)
            done = False
            action_sequence = []
            total_reward = 0
            while not done:
                state_index = self.convert_state_to_index(state)
                if np.random.rand() < self.exploration_rate:
                    action = self.env.action_space.sample()
                else:
                    action = np.argmax(self.q_table[state_index])

                action_sequence.append(action)
                next_state, reward, done, _ = self.env.step(action)
                
                next_state_index = self.convert_state_to_index(next_state)

                self.q_table[state_index, action] = (1 - self.learning_rate) * self.q_table[state_index, action] + \
                                                    self.learning_rate * (reward + self.discount_factor * np.max(self.q_table[next_state_index]))
                state = next_state
                total_reward += reward

            nextstates = state.tolist()
            pruned_actions.extend(nextstates)
            self.generated_sequences.append(action_sequence)
            logging.info(f"Episode {episode+1}/{episodes} completed with total reward: {total_reward}")
            self.exploration_rate *= self.exploration_decay

        logging.info("Training completed successfully!")

    def convert_state_to_index(self, state):
        if isinstance(state, list) or isinstance(state, np.ndarray):
            flat_state = np.ravel(state)
            index = np.dot(flat_state, np.arange(len(flat_state)))
            return int(index) % self.q_table.shape[0]
        return int(state) % self.q_table.shape[0]
    def get_generated_sequences(self): 
        return self.generated_sequences
    def print_generated_text(self):
        print(self.env.get_last_text())
        

env = TextGenerationEnv(model, tokenizer)
agent = RLAgent(env)
#prompt="As shakespeare says in "
prompt = "One of the financial scandal is the "
agent.train(prompt=prompt)

print(f"Pruned Actions: {pruned_actions}")


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
INFO:root:Episode 1/20 completed with total reward: 1
INFO:root:Episode 2/20 completed with total reward: 1
INFO:root:Episode 3/20 completed with total reward: 1
INFO:root:Episode 4/20 completed with total reward: 1
INFO:root:Episode 5/20 completed with total reward: 1
INFO:root:Episode 6/20 completed with total reward: 1
INFO:root:Episode 7/20 completed with total reward: 1
INFO:root:Episode 8/

Pruned Actions: [3198, 286, 262, 3176, 10731, 318, 262, 220, 2171, 286, 262, 3298, 11754, 1080, 326, 423, 4073, 5179, 286, 4138, 286, 3946, 284, 307, 2626, 290, 45922, 517, 284, 2666, 11, 422, 262, 3298, 3176, 5939, 284, 262, 1294, 7395, 290, 28486, 5939, 13, 383, 2274, 10731, 326, 36872, 262, 3198, 286, 262, 3176, 10731, 318, 262, 220, 2575, 259, 326, 2957, 284, 262, 29921, 805, 17235, 13, 628, 198, 464, 1637, 1908, 284, 262, 1230, 706, 29921, 805, 14707, 290, 284, 663, 5096, 1814, 373, 22138, 1068, 832, 262, 523, 12, 7174, 29928, 13, 198, 198, 1, 1532, 257, 3198, 286, 262, 3176, 10731, 318, 262, 220, 1849, 49777, 7118, 286, 262, 3884, 14166, 832, 26610, 33672, 438, 4758, 318, 8998, 379, 1642, 262, 3773, 1663, 5443, 290, 517, 2952, 355, 6341, 423, 517, 1637, 287, 511, 16511, 13, 220, 1849, 464, 1103, 2071, 783, 318, 1771, 356, 423, 3198, 286, 262, 3176, 10731, 318, 262, 220, 2171, 286, 262, 3878, 39824, 13, 1114, 257, 890, 640, 6341, 290, 7713, 547, 7960, 546, 262, 1109, 326, 1992, 24

In [None]:
import torch 
from transformers import GPT2LMHeadModel, GPT2Tokenizer

pruned_action_space = pruned_actions

def top_k_top_p_filtering( logits, top_k=0, top_p=1.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering """
    assert logits.dim() == 2  # logits should be [batch_size, vocab_size]
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 0:
        # Remove all tokens with a probability less than the last token in the top-k tokens
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]

        logits[indices_to_remove] = filter_value

    if top_p < 1.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        indices_to_remove = sorted_indices[sorted_indices_to_remove]

        logits[0,indices_to_remove] = filter_value
    return logits
    
def generate_custom_text_pruned( inputs,input_ids, max_length=50, top_k=50, top_p=0.95):

    # Initialize generated tokens list
    done = False
    while not done:
        generated_text = ""
        generated = input_ids
        for _ in range(max_length):
            outputs = model(input_ids=generated)
        
            next_token_logits = outputs.logits[:, -1, :]
            
            # Limit vocab by lowering the probailities for all other tokens except pruned
            mask = torch.full(next_token_logits.shape, float('-inf'))
            
            for token_id in pruned_action_space:
                mask[:, token_id] = next_token_logits[:, token_id]
            next_token_logits = mask
           
            # Sampling
            filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p)
    
            next_token = torch.multinomial(torch.nn.functional.softmax(filtered_logits, dim=-1), num_samples=1)
            generated = torch.cat((generated, next_token), dim=1)
            # Stop generating if the end-of-sequence token is generated
            if next_token in tokenizer.encode(tokenizer.eos_token):
                break
    
        generated_text = tokenizer.decode(generated.squeeze(), skip_special_tokens=True)
        print(generated_text)
        if "bank" in generated_text or "Bank" in generated_text:
            done = True
            
    return generated_text


        

inputs = tokenizer(prompt, return_tensors='pt')
input_ids = inputs['input_ids']


tot_time = 0   
start_time = time.time()
for ind in range(50):
    print(f"Iteration: {ind+1}/50") 
    generated_text = generate_custom_text_pruned(inputs, input_ids)
end_time = time.time()
tot_time += (end_time - start_time)
avg_time = tot_time/50
print(f"Pruned: Total Time: {tot_time}; Average Time {avg_time}")

print(generated_text)



Iteration: 1/10
One of the financial scandal is the ills of the new global banking system and the fact that those who know the financial scandals and the financial scandals have been so big that you can buy it from the New York banks when you can't get it in a bank.

And the great
Iteration: 2/10
One of the financial scandal is the ills of the financial world. The problem with financial capitalism is that it is not the working class who has to be the financial workers, but the working class. In the banking economy, it is not the banks who create money. In this crisis,
Iteration: 3/10
One of the financial scandal is the  massive massive in-accountancy system in which a bank's accounts and accounts-
accounts- are bought or "dis-

dis-

salered accounts. The  massive
Iteration: 4/10
One of the financial scandal is the ills of the "bank.

Fol, we can't let the other world's banks get what they've been forced to give us. We're in the business of trying to get people to actually give us more

In [None]:
import torch 
from transformers import GPT2LMHeadModel, GPT2Tokenizer

def top_k_top_p_filtering( logits, top_k=0, top_p=1.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering """
    assert logits.dim() == 2  # logits should be [batch_size, vocab_size]
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 0:
        # Remove all tokens with a probability less than the last token in the top-k tokens
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]

        logits[indices_to_remove] = filter_value

    if top_p < 1.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        indices_to_remove = sorted_indices[sorted_indices_to_remove]

        logits[0,indices_to_remove] = filter_value
    return logits


def generate_custom_text( inputs,input_ids, max_length=50, top_k=50, top_p=0.95):

    # Initialize generated tokens list
    done = False
    while not done:
        generated_text = ""
        generated = input_ids
        for _ in range(max_length):
            outputs = model(input_ids=generated)
    
    
            next_token_logits = outputs.logits[:, -1, :]
            
            # Mask tokens not in custom action space by setting their logits to a very low value
            #mask = torch.full(next_token_logits.shape, float('-inf'))
            
            #for token_id in pruned_action_space:
            #    mask[:, token_id] = next_token_logits[:, token_id]
            #next_token_logits = mask
           
            # Apply sampling techniques
            filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p)
    
            next_token = torch.multinomial(torch.nn.functional.softmax(next_token_logits, dim=-1), num_samples=1)
            generated = torch.cat((generated, next_token), dim=1)
            # Stop generating if the end-of-sequence token is generated
            if next_token in tokenizer.encode(tokenizer.eos_token):
                break

        generated_text = tokenizer.decode(generated.squeeze(), skip_special_tokens=True)
        print(generated_text)
        if "bank" in generated_text or "Bank" in generated_text:
            done = True

    return generated_text



inputs = tokenizer(prompt, return_tensors='pt')
input_ids = inputs['input_ids']

tot_time = 0   
start_time = time.time()
for ind in range(50):
    print(f"Iteration: {ind+1}/50") 
    generated_text = generate_custom_text(inputs, input_ids)
end_time = time.time()
tot_time += (end_time - start_time)
avg_time = tot_time/50
print(f"Not Pruned: Total Time: {tot_time}; Average Time {avg_time}")

print(generated_text)



Iteration: 1/10
One of the financial scandal is the ersatz money transfers that led to the massive rescue of Japan from the Nazis during World War II. Some of this money flows directly to the US government, such as JP Morgan Chase. JP Morgan Chase has been the recipient of over 1 billion dollars of
One of the financial scandal is the  unsuccessful and failed attempt by Paul Ryan to keep the House Republicans from passing a tax cut for middle-class Americans last month.  According to Bloomberg , the tax bill's increase in the deficit should have helped keep the House
One of the financial scandal is the ills of the financial system that has plagued the United States for the past several years. What can we do to help? We can do both. If we can create a global and growing financial system, including a robust national and regional system, we can
One of the financial scandal is the ills of the 'investor's state.' We could go to the source of the scandal, or we could get the story by an indep