In [3]:
#%pip install torch numpy transformers
%pip install gym


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [1]:
import random
import torch
import numpy as np
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
model.generation_config.pad_token_id = tokenizer.pad_token_id


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# Initialize population
def initialize_population(pop_size, prompt):
    return [prompt for _ in range(pop_size)]

# Evaluate fitness
# def evaluate(candidate, model, tokenizer):
#     input_ids = tokenizer.encode(candidate, return_tensors='pt')
#     with torch.no_grad():
#         outputs = model.generate(input_ids, max_length=50)
#         generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
#     # Example fitness based on text length (you can use more complex criteria)
#     fitness = len(generated_text)
#     return fitness, generated_text
def evaluate(individual, model, tokenizer, prompt):
    candidate = prompt + ''.join(individual)
    inputs = tokenizer(candidate, return_tensors='pt', padding=True, truncation=True)
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']
    
    with torch.no_grad():
        outputs = model.generate(
            input_ids,
            max_length=50,
            attention_mask=attention_mask,
            pad_token_id=tokenizer.eos_token_id,
            temperature=0.7,  # Adjust temperature
            top_p=0.9,         # Use top-p sampling
            do_sample=True
        )
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "bank" in generated_text:
        fitness = 50
    elif "Federal Reserve" in generated_text:
            fitness = 45
    elif "money laundering" in generated_text: 
        fitness = 40  
    else:
        fitness = 20
    return (fitness,generated_text)


# Selection
def select_parents(population, fitness_scores, num_parents):
    sorted_indices = sorted(range(len(fitness_scores)), key=lambda i: fitness_scores[i], reverse=True)
    selected_indices = sorted_indices[:num_parents]
    return [population[i] for i in selected_indices]

# Crossover
def crossover(parent1, parent2):
    mid_point = len(parent1) // 2
    child = parent1[:mid_point] + parent2[mid_point:]
    return child

# Mutation
def mutate(sequence, mutation_rate=0.1):
    sequence = list(sequence)
    for i in range(len(sequence)):
        if random.random() < mutation_rate:
            sequence[i] = random.choice(list("abcdefghijklmnopqrstuvwxyz "))
    return ''.join(sequence)

# Genetic Algorithm
def genetic_algorithm(prompt, model, tokenizer, pop_size=10, generations=5, mutation_rate=0.1):
    population = initialize_population(pop_size, prompt)
    best_candidate = None
    best_fitness = 0
    best_text = ""

    for generation in range(generations):
        fitness_scores = []
        generated_texts = []
        
        for candidate in population:
            fitness, generated_text = evaluate(candidate, model, tokenizer, prompt)
            fitness_scores.append(fitness)
            generated_texts.append(generated_text)
        
        # Track the best candidate
        for i, fitness in enumerate(fitness_scores):
            if fitness > best_fitness:
                best_fitness = fitness
                best_candidate = population[i]
                best_text = generated_texts[i]
        
        # Select parents
        parents = select_parents(population, fitness_scores, pop_size // 2)
        
        # Generate next population
        next_population = []
        while len(next_population) < pop_size:
            parent1, parent2 = random.sample(parents, 2)
            child = crossover(parent1, parent2)
            child = mutate(child, mutation_rate)
            next_population.append(child)
        
        population = next_population
    
    return best_text

prompt = "One of the financial scandals is the "
pruned_actions = []
for _ in range(20):
    final_text = genetic_algorithm(prompt, model, tokenizer)
    print(f"Final Generated Text: {final_text}")
    hof_actions = np.array(tokenizer.encode(final_text, max_length=50))
    pruned_actions.extend(list(hof_actions))

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Final Generated Text: One of the financial scandals is the One of the financial scandals is the  financial crisis of 2007-2009.   It was a great financial crisis that exposed many of our biggest banks, but it was also the beginning of a very bad financial crisis
Final Generated Text: One of the financial scandals is the One of the financial scandals is the  $1.6 trillion trillion of bailout money that went to the banks in the '90s and 2000s. 
So, there are many things
Final Generated Text: One of the financial scandals is the One of the financial scandals is the  financial scandals in Greece.
The Greek government was not only in control of the financial system, but also the banking system. There was no financial crisis.
The Greek
Final Generated Text: One of the financial scandals is the One of the financial scandals is the  financial meltdown of 2008-09, when the nation's biggest bank, Citigroup, collapsed. The ensuing financial crisis has now taken a toll on the nation's financial sy

In [4]:
import gym
import numpy as np
def calculate_perplexity(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors='pt')        
    input_ids = inputs.input_ids                      
    attention_mask = inputs.attention_mask     
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
        #outputs = model(**inputs, labels=inputs["input_ids"])
        log_likelihood =  -outputs.loss * inputs.input_ids.size(1)
    perplexity = torch.exp(log_likelihood / inputs.input_ids.size(1))
    
    return perplexity.item()

def calculate_coherence(text, model, tokenizer):
    perplexity = calculate_perplexity(text, model, tokenizer)
    
    # Normalize perplexity to a range [0, 1]
    norm_perplexity = 1 / (1 + perplexity)
    
    # Combine with other metrics (if available)
    coherence_score = norm_perplexity  
    
    return coherence_score
def _calculate_reward(generated_text, model, tokenizer):
    generated_text_str = ' '.join(generated_text)
    if "bank" in generated_text or "Bank" in generated_text or "Federal Reserve" in generated_text or "money laundering" in generated_text:
        coherence = calculate_coherence(generated_text, model, tokenizer)
        reward = coherence
    else:
        reward = 0

    return reward



In [None]:
import torch 
import time
from transformers import GPT2LMHeadModel, GPT2Tokenizer
prompt = "One of the financial scandal is the "

def top_k_top_p_filtering( logits, top_k=0, top_p=1.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering """
    assert logits.dim() == 2  # logits should be [batch_size, vocab_size]
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 0:
        # Remove all tokens with a probability less than the last token in the top-k tokens
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]

        logits[indices_to_remove] = filter_value

    if top_p < 1.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        indices_to_remove = sorted_indices[sorted_indices_to_remove]

        logits[0,indices_to_remove] = filter_value
    return logits


def generate_custom_text( inputs,input_ids, max_length=50, top_k=50, top_p=0.95):

    # Initialize generated tokens list
    done = False
    while not done:
        generated_text = ""
        generated = input_ids
        for _ in range(max_length):
            outputs = model(input_ids=generated)
    
    
            next_token_logits = outputs.logits[:, -1, :]
            
            # Mask tokens not in custom action space by setting their logits to a very low value
            #mask = torch.full(next_token_logits.shape, float('-inf'))
            
            #for token_id in pruned_action_space:
            #    mask[:, token_id] = next_token_logits[:, token_id]
            #next_token_logits = mask
           
            # Apply sampling techniques
            filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p)
    
            next_token = torch.multinomial(torch.nn.functional.softmax(next_token_logits, dim=-1), num_samples=1)
            generated = torch.cat((generated, next_token), dim=1)
            # Stop generating if the end-of-sequence token is generated
            if next_token in tokenizer.encode(tokenizer.eos_token):
                break

        generated_text = tokenizer.decode(generated.squeeze(), skip_special_tokens=True)
        print(generated_text)
        reward = _calculate_reward(generated_text,model, tokenizer)
        if reward > 0.9:
            done = True

    return generated_text



inputs = tokenizer(prompt, return_tensors='pt')
input_ids = inputs['input_ids']

tot_time = 0   
start_time = time.time()
for ind in range(50):
    print(f"Iteration: {ind}/50") 
    generated_text = generate_custom_text(inputs, input_ids)
end_time = time.time()
tot_time += (end_time - start_time)
avg_time = tot_time/50
print(f"Not Pruned: Total Time: {tot_time}; Average Time {avg_time}")

print(generated_text)



Iteration: 0/10
One of the financial scandal is the ersatz debt market. We're told that with so much debt, there is always room to maneuver. In fact, many people say that that is precisely what's happened. When you get close to the limit of just a few thousand dollars of debt
One of the financial scandal is the urchin of $90 billion in cash and securities from the collapse of Lehman Brothers. Those investments have now accounted for $50 billion of U.S. financial and commercial losses.

And they are being held in cash by a company that
One of the financial scandal is the ills the Bank of England has been dealing with. In the event, it would have to hold its ground against Britain's interest rate and its desire for greater liquidity. But its most significant policy problem is the rate it is to use to cover its borrowing
Iteration: 1/10
One of the financial scandal is the ills of the Internet and its "free zone." People are being harassed and harassed all over the world, and sometimes the

In [None]:
import torch 
from transformers import GPT2LMHeadModel, GPT2Tokenizer

pruned_action_space = pruned_actions

def top_k_top_p_filtering( logits, top_k=0, top_p=1.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering """
    assert logits.dim() == 2  # logits should be [batch_size, vocab_size]
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 0:
        # Remove all tokens with a probability less than the last token in the top-k tokens
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]

        logits[indices_to_remove] = filter_value

    if top_p < 1.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        indices_to_remove = sorted_indices[sorted_indices_to_remove]

        logits[0,indices_to_remove] = filter_value
    return logits
    
def generate_custom_text_pruned( inputs,input_ids, max_length=50, top_k=50, top_p=0.95):

    # Initialize generated tokens list
    done = False
    while not done:
        generated_text = ""
        generated = input_ids
        for _ in range(max_length):
            outputs = model(input_ids=generated)
        
            next_token_logits = outputs.logits[:, -1, :]
            
            # Mask tokens not in custom action space by setting their logits to a very low value
            mask = torch.full(next_token_logits.shape, float('-inf'))
            
            for token_id in pruned_action_space:
                mask[:, token_id] = next_token_logits[:, token_id]
            next_token_logits = mask
           
            # Apply sampling techniques
            filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p)
    
            next_token = torch.multinomial(torch.nn.functional.softmax(filtered_logits, dim=-1), num_samples=1)
            generated = torch.cat((generated, next_token), dim=1)
            # Stop generating if the end-of-sequence token is generated
            if next_token in tokenizer.encode(tokenizer.eos_token):
                break
    
        generated_text = tokenizer.decode(generated.squeeze(), skip_special_tokens=True)
        print(generated_text)
        if "bank" in generated_text or "Bank" in generated_text:
            done = True
            
    return generated_text


        

inputs = tokenizer(prompt, return_tensors='pt')
input_ids = inputs['input_ids']


tot_time = 0   
start_time = time.time()
for ind in range(50):
    print(f"Iteration: {ind+1}/50") 
    generated_text = generate_custom_text_pruned(inputs, input_ids)
end_time = time.time()
tot_time += (end_time - start_time)
avg_time = tot_time/50
print(f"Pruned: Total Time: {tot_time}; Average Time {avg_time}")

print(generated_text)



Iteration: 1/10
One of the financial scandal is the  investment of large banks and financial institutions that were not able to pay to have a central bank. This has the problem of not being able to pay out the central bank in the first thing, when there are banks doing things which have not been
Iteration: 2/10
One of the financial scandal is the  failure to pay the $6 billion it's been agreed to in the previous financial crisis, which is also part of the 'One Bank, One Bank, One Bank for the New York market of New York, the New York Fed, the
Iteration: 3/10
One of the financial scandal is the  investment of the bank that has been the biggest in the world and the company's biggest in the world.
The government, which was also involved in the scandal, is now being bailed out by Bank of America. 
The 
Iteration: 4/10
One of the financial scandal is the  "trillion-tillion-tillion" of the Federal Reserve bailout money  that it now t. has been jinginged to the public. 
The t. has to be baile