# Let's compare the behavior of GPT-2 before and after fine-tuning.

In [2]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# load pretrained model
model_pt = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# load fine-tuned model
model_checkpoint = 'outputs/gpt-2-test/gpt-2-test_final'
model_ft = GPT2LMHeadModel.from_pretrained(model_checkpoint)

In [5]:
model_pt.to('cuda')
model_ft.to('cuda')

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [10]:
# Compare the generated text from the two models
# Let's see the knowledge on the Covid-19
prompt = 'Covid-19 is'
input_ids = tokenizer.encode(prompt, return_tensors='pt')
input_ids = input_ids.to('cuda')

output_pt = model_pt.generate(input_ids, max_new_tokens=25)
output_ft = model_ft.generate(input_ids, max_new_tokens=25)

output_pt = tokenizer.decode(output_pt[0], skip_special_tokens=True)
output_ft = tokenizer.decode(output_ft[0], skip_special_tokens=True)

print('GPT2: ', output_pt)
print('Fine-tuned GPT2: ', output_ft)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


GPT2:  Covid-19 is a very good example of a good, well-designed, and well-designed, and well-designed, and well-
Fine-tuned GPT2:  Covid-19 is a viral disease that has been associated with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).


In [11]:
# Let's generate with beam search
output_pt = model_pt.generate(input_ids, max_new_tokens=25, num_beams=5)
output_ft = model_ft.generate(input_ids, max_new_tokens=25, num_beams=5)

output_pt = tokenizer.decode(output_pt[0], skip_special_tokens=True)
output_ft = tokenizer.decode(output_ft[0], skip_special_tokens=True)

print('GPT2: ', output_pt)
print('Fine-tuned GPT2: ', output_ft)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


GPT2:  Covid-19 is one of the most widely used drugs in the world. It has been used to treat a wide range of conditions including cancer,
Fine-tuned GPT2:  Covid-19 is one of the most common coronavirus-related coronavirus infections in the United States. In this article, we discuss
