In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from sklearn.metrics.pairwise import cosine_similarity
import torch
from transformers import pipeline

2024-04-29 07:55:15.575422: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 07:55:15.575530: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 07:55:15.729362: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Load Dataset

In [None]:
dataset = load_dataset("amishshah/song_lyrics")
dataset = dataset["train"].shuffle(seed=42)
subset_size = 1000
dataset = dataset.select(range(subset_size))
train_test_dataset = dataset.train_test_split(test_size=0.1)
train_dataset = train_test_dataset["train"]
val_dataset = train_test_dataset["test"]
#train_test_dataset = dataset["train"].train_test_split(test_size=0.1)
#train_dataset = train_test_dataset["train"]
#val_dataset = train_test_dataset["test"]

Generating train split: 0 examples [00:00, ? examples/s]

# Load tokenizer and pre-trained model

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import load_dataset

# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Ensure that tokenizer has padding token set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenize Dataset

In [None]:
def tokenize_function(examples):
    # Prepend the tag to each lyric in the batch
    concatenated_lyrics = ["[Genre: " + tag + "] " + lyric for tag, lyric in zip(examples["tag"], examples["lyrics"])]
    return tokenizer(concatenated_lyrics, truncation=True, padding="max_length", max_length=512)


train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

# Fine-tuning

In [None]:
# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
)

# Initialize Trainer
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

In [None]:
# Check if GPU is available and if not, use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device: ", device)

model.to(device)
# Train the model
trainer.train()

In [None]:
model_path = './results'

In [None]:
# Save the model and tokenizer
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

# Lyric Generation

In [None]:
# Load the model and tokenizer for text generation
from transformers import pipeline

# Ensure your model and tokenizer are loaded correctly
text_generator = pipeline('text-generation', model=model_path, tokenizer=model_path)

# Example of generating genre-specific text
genre = "rap"  # Replace with any genre present in your dataset

prompt = f"[Genre: {genre}] "
results = text_generator(prompt, max_length=500)
print(results[0]['generated_text'])

# Load in a fine-tuned model

In [None]:
model = GPT2LMHeadModel.from_pretrained(model_path)

# Evaluate fine-tuning using perplexity

In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

def calculate_perplexity(model, tokenizer, text):
    encode = tokenizer.encode(text, return_tensors='pt')
    with torch.no_grad():
        outputs = model(encode, labels=encode)
        loss = outputs[0]

    return torch.exp(loss).item()

# Load models and tokenizer
model_pretrained = GPT2LMHeadModel.from_pretrained(model_path)
model_base = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Example text
text = "Complete this lyric about love and loss:"

# Calculate perplexity
perplexity_pretrained = calculate_perplexity(model_pretrained, tokenizer, text)
perplexity_base = calculate_perplexity(model_base, tokenizer, text)

print(f'Perplexity of Pretrained Model: {perplexity_pretrained}')
print(f'Perplexity of Base GPT-2 Model: {perplexity_base}')


Perplexity of Pretrained Model: 981.9048461914062
Perplexity of Base GPT-2 Model: 989.9269409179688


# Evaluate fine-tuning using rouge-score

In [None]:
masked_song_prompt = "Hello darkness, my old ****, I've come to **** with you again, Because a vision softly ****,"
unmasked_song_prompt = '''Hello darkness, my old friend
I've come to talk with you again
Because a vision softly creeping'''

In [None]:
from rouge_score import rouge_scorer

# Load models and tokenizer
model_base = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

text_generator_finetuned = pipeline('text-generation', model=model_path, tokenizer=model_path)
generated_lyrics_finetuned = text_generator(masked_song_prompt, max_length=500)[0]['generated_text']

text_generator_base = pipeline('text-generation', model=model_base, tokenizer=tokenizer)
generated_lyrics_base = text_generator(masked_song_prompt, max_length=500)[0]['generated_text']

# Initialize the ROUGE scorer, you can specify which rouge types to calculate
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Function to calculate average scores
def calculate_average_rouge(generated, references):
    scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}
    
    for gen, ref in zip(generated, references):
        score = scorer.score(ref, gen)
        for key in scores.keys():
            scores[key].append(score[key].fmeasure)
    
    average_scores = {key: sum(values) / len(values) for key, values in scores.items()}
    return average_scores

# Calculate average ROUGE scores
average_scores_base = calculate_average_rouge(generated_lyrics_base, unmasked_song_prompt)
print("Average ROUGE scores for base GPT-2:", average_scores)

average_scores_finetuned = calculate_average_rouge(generated_lyrics_finetuned, unmasked_song_prompt)
print("Average ROUGE scores for finetuned model:", average_scores)


# Evaluate using ChatGPT

In [None]:
from openai import OpenAI

client = OpenAI(
    api_key="sk-proj-7u1gcybaXUsYd0kOQVXkT3BlbkFJmTc0ma2dcvlxqf1k9Te2",
)

def compare_lyrics(lyrics1, lyrics2):
    prompt_text = f"Here are two sets of song lyrics:\n\nLyrics A:\n{lyrics1}\n\nLyrics B:\n{lyrics2}\n\nWhich set of lyrics do you think is better?"
    
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt_text,
            }
        ],
        model="gpt-3.5-turbo",
    )

#     print(response.choices[0].text.strip())
    print(chat_completion.choices[0].message)
    
    
prompt = "Complete this lyric about love and loss:"
# Load models and tokenizer
model_base = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

text_generator_finetuned = pipeline('text-generation', model=model_path, tokenizer=model_path)
generated_lyrics_finetuned = text_generator_finetuned(prompt, max_length=500, truncation=True)[0]['generated_text']

text_generator_base = pipeline('text-generation', model=model_base, tokenizer=tokenizer)
generated_lyrics_base = text_generator_base(prompt, max_length=500, truncation=True)[0]['generated_text']

# Call the function to compare the lyrics
compare_lyrics(generated_lyrics_base, generated_lyrics_finetuned)
# Lyrics A is the first parameter, Lyrics B is the second parameter