## Baseline for genre model

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
from pathlib import Path
import os
os.environ["HF_HUB_DISABLE_XET"] = "1"
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
os.environ["HF_HUB_OFFLINE"] = "0"
cache_dir = Path.home() / ".cache" / "huggingface"

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

model_name = "distilgpt2"
model = GPT2LMHeadModel.from_pretrained(
    model_name,
    cache_dir=cache_dir,
    use_safetensors=False,
    resume_download=False
)
print('down')
tokenizer = GPT2Tokenizer.from_pretrained(model_name, cache_dir=cache_dir)
tokenizer.pad_token = tokenizer.eos_token

model = model.to(device)
model.eval()

input_sequence = "Genre: Pop\n\n"
input_ids = tokenizer.encode(input_sequence, return_tensors='pt').to(device)

print(f"Input IDs shape: {input_ids.shape}")

In [None]:
from tqdm import tqdm

import time
start_time = time.time()

with torch.inference_mode():
    sample_outputs = model.generate(
        input_ids,
        do_sample=True,
        max_length=300,
        temperature=0.9,
        top_k=50,
        top_p=0.9,
        repetition_penalty=1.2,
        num_return_sequences=3,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

elapsed = time.time() - start_time
print(f" samples in {elapsed:.2f} seconds")

# Decode and print
for i, output in enumerate(sample_outputs, 1):
    text = tokenizer.decode(output, skip_special_tokens=True)
    print(f"\n{'='*60}")
    print(f"BASELINE SAMPLE {i}")
    print(f"{'='*60}")
    print(text)