### Song Generation:

In [None]:
# imports
import torch
import pandas as pd
import numpy as np
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, set_seed

In [None]:
# set transformer seed
set_seed(7)

# mount drive
drive.mount('/content/drive')

# define path to folder - CHANGE TO WHICHEVER DIRECTORY YOU WANT
path_folder_model = f"/content/drive/MyDrive/Deep_Learning_project/model"
path_folder_tokenizer = f"/content/drive/MyDrive/Deep_Learning_project/splits/tokenizer"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# load model

# load pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained(path_folder_tokenizer)

# load networks from folders
model = AutoModelForCausalLM.from_pretrained(path_folder_model)

model.resize_token_embeddings(len(tokenizer))


# move models to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # if using GPU
model = model.to(device)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
#@markdown Song genre: (pop / rap / both)
genre = "both" #@param {type: "string"}

##@markdown Enter starting sentence (leave empty if you want to generate a song from the beggining):
#start = "" # @param {type:"string"}

#@markdown Number of generated songs:
num_songs =  2 #@param {type:"integer"}

#@markdown Parameters:
max_length = 150 #@param {type:"integer"}
temperature = 2.5 #@param {type:"slider", min:0.01, max:3.0, step:0.01}
top_k = 50 #@param {type:"integer"}
top_p = 0.8 #@param {type:"slider", min:0, max:1, step:0.01}
repetition_penalty =  0.8 #@param {type:"number"}

In [None]:
def generate_songs(model, tokenizer, start, genre, num_songs, max_length, temp, top_k, top_p, repetition_penalty):

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    # combine genre with starting text
    full_prompt = f"[Genre: {genre}] {start}"  # Format the genre tag as used in training

    # tokenize starting text
    encoded_prompt = tokenizer.encode(full_prompt, add_special_tokens=False, return_tensors="pt").to(device)

    # generate output using model
    output = model.generate(
        input_ids=encoded_prompt,
        max_length=max_length + encoded_prompt.shape[1], #len(encoded_prompt),
        temperature=float(temp),
        top_k=top_k,
        top_p=float(top_p),
        repetition_penalty=float(repetition_penalty),  # penalty on repeating phrases
        do_sample=True,
        num_return_sequences=num_songs
    )

    if len(output.shape) > 2:  # if more than one song
        output.squeeze()  # squeeze in place

    generated_songs = []  # initialize empty list for the generated songs
    for i, sequence in enumerate(output):
        sequence = sequence.tolist()
        text = tokenizer.decode(sequence, skip_special_tokens=True)
        generated_songs.append(text)
    return generated_songs


In [None]:
# generate the songs
start = "<sos>"

In [None]:
songs_rap = []
songs_pop = []

if genre == "rap" or genre == "both":
    songs_rap = generate_songs(model, tokenizer, start, "Rap", num_songs, max_length, temperature, top_k, top_p, repetition_penalty)

if genre == "pop" or genre == "both":
    songs_pop = generate_songs(model, tokenizer, start, "Pop", num_songs, max_length, temperature, top_k, top_p, repetition_penalty)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [None]:
# post-processing
def post_process(line):
    line = line.replace("<sos>", "")
    line = line.replace("<pad>", "")
    line = line.replace("<eos>", "")
    return line

In [None]:
# print results
if genre == "rap" or genre == "both":
    print("\n----- Rap Song Output: -----")
    for i, song in enumerate(songs_rap):
        print(f"Song #{i + 1}:\n")
        for line in song.replace(", ", ",\n").replace(". ", ".\n").replace(": ", ":\n").replace("- ", "-\n").replace("! ", "!\n").replace("? ", "?\n").split('\n'):
            processed_line = post_process(line)
            print(processed_line)
        print("")

if genre == "pop" or genre == "both":
    print("\n----- Pop Song Output: -----")
    for i, song in enumerate(songs_pop):
        print(f"Song #{i + 1}:\n")
        for line in song.replace(", ", ",\n").replace(". ", ".\n").replace(": ", ":\n").replace("- ", "-\n").replace("! ", "!\n").replace("? ", "?\n").split('\n'):
            processed_line = post_process(line)
            print(processed_line)
        print("")


----- Rap Song Output: -----
Song #1:

  Lately,
You have kept away some friends who want

A different,
but you stayed away from her
In some distant and sometimes
In some way they are gone 
That girl is your favorite boo,
 she's my everything
The way you act
Just be careful to put your emotions away
No you'll need to look a

A couple who be
I could spend a life of me,
in a couple of us

Song #2:

  I been grinding my time is  i been working hard I been taking all i know  I been trying i dont feel alone  i know its taking all i know its taking on taking my pain away i know my problems never stop I guess it is hard getting to see the light yeah so so yeah I wanna ride the world oh girl i never would never doubt oh i said no no no no yes me so me on ride
Hood yeah my diamonds green it glo i got that i ain never stop so it be the pain yeah im on on to ride yeah yeah i just want some time away yeah my  yeah 
I been grinding yeah I been waiting wait all week now I got it yeah yeah yeah yeah