In [2]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from pathlib import Path
import os

In [4]:
model_name = "distilgpt2"
model_save_path = Path(os.getcwd()) / 'models'
weights_path = f'{model_save_path}/last_training.pt'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(weights_path)
print(device)

/home/isachansson/DML-project/notebooks/models/last_training.pt
cuda


In [5]:
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = GPT2LMHeadModel.from_pretrained(model_name)
model.load_state_dict(torch.load(weights_path, map_location=device))
model.to(device)
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [13]:
prompt = "Genre: Heavy Metal\n\nLyrics:"
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)

with torch.inference_mode():
    outputs = model.generate(
        input_ids,
        do_sample=True,
        max_length=512,
        top_p=0.9,
        top_k=50,
        temperature=0.9,
        repetition_penalty=1.1,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        num_return_sequences=2,
    )
for i, output in enumerate(outputs, 1):
    text = tokenizer.decode(output, skip_special_tokens=True)
    print(f"\n{'='*60}")
    print(f"SAMPLE {i}")
    print(f"{'='*60}")
    print(text)


SAMPLE 1
Genre: Heavy Metal

Lyrics:
The sun is cold, but it's not like the summer in my world
There's no moon to see you through, nothing of the earth left behind
I've been waiting all night long for you
I can't wait to get closer, I'm on your way
Just another day away from what we did before
But if things don' feel right now
Or do I just go home?
It seems time has come to make a difference
Time has come to stop us, and there's something about life inside
In this moment that they'll take care of me
And when love will end again
And at peace will come true!

We're going somewhere - nowhere between us
A lot has changed since we came together
Our story told the story of our love, their times were different
But still it feels so old today
When we are alone with you
You won't have to leave until we change the ways of life
As though we used to be strangers
To keep ourselves apart without losing our place
Love may start turning up in the morning
All around you might wonder why
Is someone her