In [1]:
import torch
import torch.nn as nn
from models.transformer_pytorch import TransformerPyTorch
from models.transformer import Transformer
from hyperparameters import hyperparameters

vocab_size = 32000
batch_size = 2
seq_len = 5

pytorch_model: nn.Module = TransformerPyTorch(
    vocab_size=vocab_size,
    d_model=hyperparameters.transformer.hidden_size,
    num_heads=hyperparameters.transformer.num_heads,
    d_ff=hyperparameters.transformer.encoder_ffn_embed_dim,
    num_encoder_layers=hyperparameters.transformer.num_hidden_layers,
    num_decoder_layers=hyperparameters.transformer.num_hidden_layers,
    dropout=hyperparameters.transformer.dropout,
    max_len=hyperparameters.transformer.max_len
)
own_model = Transformer(
    src_vocab_size=vocab_size,
    tgt_vocab_size=vocab_size,
    d_model=hyperparameters.transformer.hidden_size,
    num_heads=hyperparameters.transformer.num_heads,
    d_ff=hyperparameters.transformer.encoder_ffn_embed_dim,
    num_encoder_layers=hyperparameters.transformer.num_hidden_layers,
    num_decoder_layers=hyperparameters.transformer.num_hidden_layers,
    dropout=hyperparameters.transformer.dropout,
    max_len=hyperparameters.transformer.max_len
)
criterion = nn.CrossEntropyLoss(ignore_index=0, reduction="mean")

# Dummy data
src = torch.randint(1, vocab_size, (batch_size, seq_len))
tgt = torch.randint(1, vocab_size, (batch_size, seq_len))

# Ensure no zeros in the middle (just for clarity)
# but you can keep them if you want to test pad
decoder_in = tgt[:, :-1]
labels = tgt[:, 1:]

logits = pytorch_model(src, decoder_in)  # shape [B, T-1, vocab_size]
logits = logits.transpose(1, 2)  # shape [B, vocab_size, T-1]

loss = criterion(logits, labels)  # shape [B, T-1]
print("Dummy test loss =", loss.item())

# Own model
logits = own_model(src, decoder_in)  # shape [B, T-1, vocab_size]
logits = logits.transpose(1, 2)  # shape [B, vocab_size, T-1]

loss = criterion(logits, labels)  # shape [B, T-1]
print("Dummy test loss on own model =", loss.item())

Dummy test loss = 10.64527416229248
Dummy test loss on own model = 10.447006225585938




In [4]:
# Find average sentence length in the dataset
merged_path = "local/data/training/bpe_train.de"
total_len = 0
num_lines = 0

with open(merged_path, "r", encoding="utf-8") as f:
    for line in f:
        total_len += len(line.split())
        num_lines += 1

avg_len = total_len / num_lines
print("Average sentence length in the dataset =", avg_len)

Average sentence length in the dataset = 30.32287386028867


In [5]:
import pickle
from vocab import Vocabulary


vocab = pickle.load(open("local/vocab_shared.pkl", "rb")) # type: ignore
print("Vocab size =", len(vocab))

Vocab size = 32181


In [1]:
import torch

x = torch.tensor(
    [
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]
    ]
)
print(x.view(-1))
print(x.view(3, 3))
x[:1]

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])


tensor([[1, 2, 3]])

In [15]:
from models.transformer_model import TransformerModel
from hyperparameters import hyperparameters
from uq.generate_with_uq import _enable_test_time_dropout
from torch import nn

vocab_size = 32000

hyperparameters.transformer.transformer_implementation = "pytorch"

model = TransformerModel(
    vocab_size=vocab_size,
    d_model=hyperparameters.transformer.hidden_size,
    num_heads=hyperparameters.transformer.num_heads,
    d_ff=hyperparameters.transformer.encoder_ffn_embed_dim,
    num_encoder_layers=hyperparameters.transformer.num_hidden_layers,
    num_decoder_layers=hyperparameters.transformer.num_hidden_layers,
    dropout=hyperparameters.transformer.dropout,
    max_len=hyperparameters.transformer.max_len
)

model.eval()

def dropout_repr(self):
    return f"{self.__class__.__name__}(p={self.p}, training={self.training})"
nn.Dropout.__repr__ = dropout_repr


def enable_fast_test_time_dropout(model: TransformerModel) -> None:
    """
    Enable dropout for the final decoder layer in the transformer.
    """
    final_decoder_layer = model.transformer.decoder.layers[-1]
    for module in final_decoder_layer.modules():
        if isinstance(module, nn.Dropout):
            module.train()

enable_fast_test_time_dropout(model)


def print_model_with_mode(module, indent=0):
    prefix = ' ' * indent
    mode = 'train' if module.training else 'eval'
    print(f"{prefix}{module.__class__.__name__} (mode={mode})")
    for name, child in module.named_children():
        print_model_with_mode(child, indent + 4)

print_model_with_mode(model)
# model


TransformerModel (mode=eval)
    Embedding (mode=eval)
    Dropout (mode=eval)
    PositionalEncoding (mode=eval)
    Transformer (mode=eval)
        TransformerEncoder (mode=eval)
            ModuleList (mode=eval)
                TransformerEncoderLayer (mode=eval)
                    MultiheadAttention (mode=eval)
                        NonDynamicallyQuantizableLinear (mode=eval)
                    Linear (mode=eval)
                    Dropout (mode=eval)
                    Linear (mode=eval)
                    LayerNorm (mode=eval)
                    LayerNorm (mode=eval)
                    Dropout (mode=eval)
                    Dropout (mode=eval)
                TransformerEncoderLayer (mode=eval)
                    MultiheadAttention (mode=eval)
                        NonDynamicallyQuantizableLinear (mode=eval)
                    Linear (mode=eval)
                    Dropout (mode=eval)
                    Linear (mode=eval)
                    LayerNorm (mode=eval)


In [1]:
from uq.acquisition_func import BLEU_mean_output_batch,BLEUVariance,VR_mpnet_base_distance
import torch
# Dummy data
sentences = [["Hello world", "Goodbye world", "Hi globe", "Hi you are cool","Hello world"],
             ["dogs are cool", "cats are cool", "dogs are nice", "cats are cool","doggie"],
             [
                "The W514 village association is once again hosting this great exhibition.",
                "Once again, the local club W514 is setting out this large exhibition.",
                "The local district association W514 is once again holding this large exhibition.",
                "The local association W514 is once again setting out this large exhibition.",
                "The W514 local association is once again organizing this large exhibition."
             ],             
            ]
bv=BLEUVariance()
vr=VR_mpnet_base_distance()
print("BLEU_mean_output_batch",BLEU_mean_output_batch(sentences))
print("BLEUVariance",bv(sentences,torch.zeros(2)))
print("VR_mpnet_base_distance",vr(sentences,torch.zeros(2)))


  from .autonotebook import tqdm as notebook_tqdm


BLEU_mean_output_batch ['Hi globe', 'dogs are cool', 'The local association W514 is once again setting out this large exhibition.']
BLEUVariance tensor([1.9722e-31, 1.9722e-31, 1.9722e-31])
VR_mpnet_base_distance tensor([1.8255, 1.5468, 0.6451])


In [6]:
import tiktoken
from gpt2project.data_processing.load_commongen import generate_input_text
from gpt2project.gpt2model import GPT
from gpt2project.gpt2_generate import generate_autoregressivly_gpt2
import torch

from gpt2project.utils.decode import decode_token_list

model = GPT.from_pretrained("gpt2")
model.eval()
prompt = generate_input_text(["car", "fast", "park"])
tokenizer = tiktoken.get_encoding("gpt2")


loading weights from pretrained gpt: gpt2


In [7]:
from gpt2project.search_methods_gpt import (
    AutoregressiveInferenceResults,
    _clean_inference_results,
    greedy_search_gpt,
)
import torch.nn as nn
from gpt2project.utils.decode import decode_token_id_batch
from gpt2project.utils.print_generated_text import print_generated_text_with_colors
from hyperparameters import hyperparameters
from uq.generate_with_uq import _enable_test_time_dropout

_enable_test_time_dropout(model)

tgt_tokens = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
results = [
    generate_autoregressivly_gpt2(
        model, tokenizer, tgt_tokens, greedy_search_gpt, break_on_newline=True
    )
    for _ in range(10)
]
results = [result.token_ids.squeeze(0).cpu() for result in results]
generated_text = decode_token_id_batch(results, tokenizer)
print_generated_text_with_colors(generated_text, prompt)

Task: Generate a meaningful sentence using the provided words.

Example 1:
Words: field, look, stand.
Sentence: The player stood in the field looking at the batter.

Example 2:
Words: climb, building, side.
Sentence: I climbed the side of the building.

Now try:
Words: car, fast, park.
Sentence:
[91m I climbed the side of the building.
[0m
[92m I climbed the side of the building.
[0m
[93m I climbed the side of the car.
[0m
[94m I was driving fast.
[0m
[95m The player was fast.
[0m
[96m I climbed the car fast.
[0m
[91m I climbed the side of the car.
[0m
[92m I parked the car.
[0m
[93m I'm going to park.
[0m
[94m I I climbed the car.
[0m


In [5]:
prompt

'Task: Generate a meaningful sentence using the provided words.\n\nExample 1:\nWords: field, look, stand.\nSentence: The player stood in the field looking at the batter.\n\nExample 2:\nWords: climb, building, side.\nSentence: I climbed the side of the building.\n\nNow try:\nWords: however, transmitter, arbitrary.\nSentence:'

In [65]:
import numpy as np
import wandb
import matplotlib.pyplot as plt

# Resume your existing run
wandb.init(
    project="GPT2Project",
    resume="never",
    group="evaluation",
    dir="test-wandb",
    job_type="evaluation",
)

assert wandb.run is not None
current_step_of_run = wandb.run.step
print(f"Current step of run: {current_step_of_run}")

# Create your data
y = list(range(11, 1, -1))
y = np.array(y) * 100
x = list(range(1, 11))

# Create a matplotlib plot
plt.figure(figsize=(10, 6))
plt.plot(x, y, marker="o", linestyle="-", color="blue")
plt.title("Retention Curve")
plt.xlabel("Number of Samples")
plt.ylabel("Evaluation Score")
plt.grid(True)

# Save the figure to a temporary file
plt_filename = "retention_curve.png"
plt.savefig(plt_filename)
plt.close()

# Create a table with benchmark name and image
table = wandb.Table(columns=["benchmark_name", "plot", "max_score", "min_score"])

# Add a row to the table with benchmark name and image
benchmark_name = "GPT2 Evaluation"
max_score = y.max()
min_score = y.min()
table.add_data(benchmark_name, wandb.Image(plt_filename), max_score, min_score)

# Log the table to W&B
wandb.log({"benchmark_results": table})

# Also add to summary to make it appear on the dashboard
# wandb.run.summary["benchmark_table"] = table

# Also save the image directly
# wandb.save(plt_filename)

wandb.finish()

Current step of run: 0


In [71]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output

# Generate sample data
np.random.seed(0)
steps = np.arange(0, 100)
model_types = ['Model A', 'Model B', 'Model C']
acquisition_functions = ['BeamScore', 'BALD', 'BLEUVar', 'mpnet_cosine']
evaluation_functions = ['F1Eval', 'BLEU_eval', 'TargetUsageEval', 'ConceptUsageEval']

# Create more complex data structure with multiple dimensions
data = []
for model in model_types:
    for aq_func in acquisition_functions:
        for eval_func in evaluation_functions:
            # Create slightly different random patterns for each combination
            base = np.cumsum(np.random.randn(len(steps)))
            # Add some characteristic patterns based on the function types
            if aq_func == 'BALD':
                base += 2
            elif aq_func == 'BLEUVar':
                base -= 1
            
            if eval_func == 'BLEU_eval':
                base *= 1.2
            elif eval_func == 'ConceptUsageEval':
                base *= 0.8
                
            data.append(pd.DataFrame({
                'step': steps, 
                'value': base, 
                'model': model,
                'acquisition_function': aq_func,
                'evaluation_function': eval_func
            }))
            
df = pd.concat(data)

# Create widgets for filtering
model_selector = widgets.SelectMultiple(
    options=model_types,
    value=model_types,  # default: show all
    description='Models',
    disabled=False
)

acquisition_selector = widgets.SelectMultiple(
    options=acquisition_functions,
    value=[acquisition_functions[0]],  # default: first one
    description='Acquisition',
    disabled=False
)

evaluation_selector = widgets.SelectMultiple(
    options=evaluation_functions,
    value=[evaluation_functions[0]],  # default: first one
    description='Evaluation',
    disabled=False
)

# Add title input
title_input = widgets.Text(
    value='Benchmark Retention Curve',
    placeholder='Enter plot title',
    description='Title:',
    disabled=False
)

# Add export button
export_button = widgets.Button(
    description='Export to SVG',
    disabled=False,
    button_style='success',
    tooltip='Export current plot to SVG file'
)

output_filename = widgets.Text(
    value='plot_export.svg',
    placeholder='filename.svg',
    description='Filename:',
    disabled=False
)

# Create output area for the plot
plot_output = widgets.Output()

def create_plot(selected_models, selected_acquisitions, selected_evaluations, plot_title):
    fig = plt.figure(figsize=(10, 6))
    
    # Filter data based on all selections
    for model in selected_models:
        for aq_func in selected_acquisitions:
            for eval_func in selected_evaluations:
                filtered_df = df[(df['model'] == model) & 
                                 (df['acquisition_function'] == aq_func) &
                                 (df['evaluation_function'] == eval_func)]
                
                if not filtered_df.empty:
                    label = f"{model} - {aq_func} - {eval_func}"
                    plt.plot(filtered_df['step'], filtered_df['value'], label=label)
    
    plt.xlabel('Number of Samples')
    plt.ylabel('Evaluation Score')
    plt.title(plot_title)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.grid(True, linestyle='--', alpha=0.7)
    return fig

def update_plot(selected_models, selected_acquisitions, selected_evaluations, plot_title):
    with plot_output:
        clear_output(wait=True)
        fig = create_plot(selected_models, selected_acquisitions, selected_evaluations, plot_title)
        plt.show()

# Export function
def export_svg(b):
    fig = create_plot(
        model_selector.value,
        acquisition_selector.value,
        evaluation_selector.value,
        title_input.value
    )
    filename = output_filename.value
    if not filename.endswith('.svg'):
        filename += '.svg'
    fig.savefig(filename, format='svg', bbox_inches='tight')
    plt.close(fig)
    print(f"Plot exported to {filename}")

# Connect the button to the export function
export_button.on_click(export_svg)

# Display the export controls
export_controls = widgets.HBox([output_filename, export_button])
display(export_controls)

# Use interactive to update the plot when any selection changes
interactive_plot = widgets.interactive(
    update_plot, 
    selected_models=model_selector,
    selected_acquisitions=acquisition_selector,
    selected_evaluations=evaluation_selector,
    plot_title=title_input
)

display(interactive_plot)
display(plot_output)


HBox(children=(Text(value='plot_export.svg', description='Filename:', placeholder='filename.svg'), Button(butt…

interactive(children=(SelectMultiple(description='Models', index=(0, 1, 2), options=('Model A', 'Model B', 'Mo…

Output()

Plot exported to plot_export.svg
Plot exported to plot_export.svg
