## To create a text completion model, we need to use a language model. In this case, we will use the GPT-2 language model, which is a state-of-the-art language model developed by OpenAI.

In [4]:
# Installing the Transformers library
!pip install transformers 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
# importing the modules
import pandas as pd

# Reading the data
data = pd.read_csv('lyrics.csv')

In [57]:
data.head()

Unnamed: 0,title,lyrics,sentences,predictions
0,100 words,verse 1\nfive am trains\nand broken chains\ni ...,"[verse 1, five am trains, and broken chains, i...","[verse 1st half\n\nRedskins 4, 49ers 1 1:09 1 ..."
1,A Shot of Alcohol,im a shot of alcohol for you\nthe scary monste...,"[im a shot of alcohol for you, the scary monst...",[im a shot of alcohol for you to drink. You mi...
2,Ab Hoga Kya,hazaaron ishaare karoon bhi to kya hai\npaheli...,"[hazaaron ishaare karoon bhi to kya hai, pahel...",[hazaaron ishaare karoon bhi to kya hai naa ha...
3,All I Need,verse 1\nwould you break away\nif i tied my he...,"[verse 1, would you break away, if i tied my h...","[verse 1:10:27 PM ET Mon, 27 Dec 2014 22:10:04..."
4,Artist,she thinks of herself\nas an artist\nshe walks...,"[she thinks of herself, as an artist, she walk...",[she thinks of herself as someone who has foun...


## Performing some preprocessing

In [7]:
import re

# Removing the special characters
data["lyrics"] = data["lyrics"].apply(lambda x: re.sub(r'[^\w\s]','',x))

# Convert into lowercase
data["lyrics"] = data["lyrics"].apply(lambda x: x.lower())

# Split lyrics into sentences
data["sentences"] = data["lyrics"].apply(lambda x: x.split('\n'))

## Load the GPT-2 Language model and Analyzer

In [8]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2') 

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

## Generate the text completion predictions for each sentence in the lyrics

In [None]:
def generate_completion(sentence):
    input_ids = tokenizer.encode(sentence, return_tensors='pt')
    output = model.generate(input_ids, max_length=50, do_sample=True)
    return tokenizer.decode(output[0], skip_special_tokens=True)

data["predictions"] = data["sentences"].apply(lambda x: [generate_completion(s) for s in x])

## Saving the predictions in a new csv file

In [11]:
data.to_csv("predictions.csv", index=False)

In [48]:
def get_lyrics(song_title):
    # Filter the data to get the lyrics for the given song
    song_data = data[data["title"] == song_title].iloc[0]
    lyrics = song_data["lyrics"]
    
    # Return the lyrics as a string
    return lyrics

In [55]:
# Example usage: get the lyrics for the song "Cold/Mess"
lyrics = get_lyrics("Artist")
print(lyrics)

she thinks of herself
as an artist
she walks to the show
like a queen
her demeanour is slick
and she can turn you down with a flick of her fingers
if she feels it
she thinks of herself
as a lady
she walks by the aisle
with grace
her hair flows down her neck
and she might even give you a peck on your lips
if she feels it
she thinks of herself
as an artist
ooooh shes an artist
