In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from pathlib import Path
import os

In [2]:
model_name = "distilgpt2"
model_save_path = Path(os.getcwd()) / 'models'
weights_path = f'{model_save_path}/last_training.pt'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(weights_path)
print(device)

/home/isachansson/DML-project/notebooks/models/last_training.pt
cuda


In [3]:
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = GPT2LMHeadModel.from_pretrained(model_name)
model.load_state_dict(torch.load(weights_path, map_location=device))
model.to(device)
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [4]:
print("\nLoading baseline model for comparison...")
baseline_model = GPT2LMHeadModel.from_pretrained('distilgpt2').to(device)
baseline_model.eval()


Loading baseline model for comparison...


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [15]:
def generate_lyrics(
    model,
    tokenizer,
    genre,
    device,
    max_length=512,
    num_samples=1
):
    model.eval()
    
    prompt = f"Genre: {genre}\n\nLyrics:"
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    
    with torch.no_grad():
        outputs = model.generate(
        inputs['input_ids'],
        do_sample=True,
        max_length=max_length,
        top_p=0.9,
        top_k=50,
        temperature=0.9,
        repetition_penalty=1.1,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        num_return_sequences=num_samples,
    )
    
    lyrics = []
    for output in outputs:
        text = tokenizer.decode(output, skip_special_tokens=True)
        text = text[len(prompt):].strip()
        lyrics.append(text)
    
    return lyrics

In [17]:
def compare_baseline_vs_finetuned(baseline_model, finetuned_model, tokenizer, device):
    genres = ['Pop', 'Heavy Metal', 'Indie']
    
    for genre in genres:
        print(f"\n{'='*80}")
        print(f"GENRE: {genre}")
        print(f"{'='*80}")
        
        print("\n" + "-"*80)
        print("BASELINE")
        print("-"*80)
        
        baseline_lyrics = generate_lyrics(
            baseline_model,
            tokenizer,
            genre,
            device,
            max_length=512,
            num_samples=1
        )
        print(baseline_lyrics[0][:500])
        print("\n" + "-"*80)
        print("FINE-TUNED")
        print("-"*80)
        
        finetuned_lyrics = generate_lyrics(
            finetuned_model,
            tokenizer,
            genre,
            device,
            max_length=512,
            num_samples=1
        )
        print(finetuned_lyrics[0][:500])

In [18]:
compare_baseline_vs_finetuned(baseline_model, model, tokenizer, device)


GENRE: Pop

--------------------------------------------------------------------------------
BASELINE
--------------------------------------------------------------------------------
"I'm a Christian." - A new album from their label, the Allure Label, has been released on all three disc formats and is set to be out for only £14.99 / 30 GBp (for $15/day or over).The record covers 13 songs with several tracks including two solo releases; some of which are covered in an adorably close third by The Librarian's Bookkeeper. As well as reissue artwork along with art direction, each song features more than four different cover versions featuring various characters – most likely bein

--------------------------------------------------------------------------------
FINE-TUNED
--------------------------------------------------------------------------------
Lyrics:
You are your only friend
I'm in the wrong place, right now
The last time I saw you was when he kissed me on my face
And his head turn