# NLG Project: Making your own lyrics model

###Step 1. Make sure you have the data!

In [None]:
# use the os library with getcwd() method

In [1]:
import os
print(os.getcwd())
os.chdir('sample_data')
print(os.getcwd())
print(os.listdir())
os.chdir('..')

/content
/content/sample_data
['anscombe.json', 'README.md', 'artists-data.csv', 'lyrics-data.csv', 'california_housing_train.csv', 'mnist_train_small.csv', 'mnist_test.csv', 'california_housing_test.csv']


###Step 2: Setup fastai, then import fastai and pandas.

In [2]:
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [3]:
from fastbook import *
from fastai.text.all import *

In [4]:
import pandas as pd

### Step 3: Preprocess Data.

In [5]:
# read in the data

In [6]:
# check the data format

In [7]:
# rename the columns to align on a specific 'key'

# merge the data (similar to an inner join in SQL)

In [8]:
# get only english artists and relevant columns

In [9]:
# drop all duplicate songs and reset the index 

In [10]:
# read and combine the data
import csv
# hacky workaround - not really sure why this works
# lyrics_data = pd.read_csv('sample_data/lyrics-data.csv', quoting=csv.QUOTE_NONE, error_bad_lines=False)
lyrics_data = pd.read_csv('sample_data/lyrics-data.csv')
lyrics_data.rename(columns={"ALink": "Link"}, inplace=True)
artist_data = pd.read_csv('sample_data/artists-data.csv')
print(artist_data.columns, lyrics_data.columns)
merged_dfs = lyrics_data.merge(artist_data, how='inner', on='Link')
eng_artists = merged_dfs.loc[merged_dfs['language'] == 'en', ['Artist', 'SName', 'Lyric', 'Genres']].drop_duplicates(subset=['SName'])
eng_artists.reset_index(inplace=True, drop=True)
eng_artists.head()

Index(['Artist', 'Genres', 'Songs', 'Popularity', 'Link'], dtype='object') Index(['Link', 'SName', 'SLink', 'Lyric', 'language'], dtype='object')


Unnamed: 0,Artist,SName,Lyric,Genres
0,Ivete Sangalo,Careless Whisper,"I feel so unsure\nAs I take your hand and lead you to the dance floor\nAs the music dies, something in your eyes\nCalls to mind a silver screen\nAnd all those sad goodbyes\n\nI'm never gonna dance again\nGuilty feet have got no rhythm\nThough it's easy to pretend\nI know you're not a fool\n\nShould've known better than to cheat a friend\nAnd waste the chance that I've been given\nSo I'm never gonna dance again\nThe way I danced with you\n\nTime can never mend\nThe careless whispers of a good friend\nTo the heart and mind\nIgnorance is kind\nThere's no comfort in the truth\nPain is all you'...",Pop; Axé; Romântico
1,Ivete Sangalo,Could You Be Loved / Citação Musical do Rap: Se Ligue,"Don't let them fool, ya\nOr even try to school, ya! Oh, no!\nWe've got a mind of our own\nSo go to hell if what you? re thinking is not right!\nLove would never leave us alone\nA-yin the darkness there must come out to light\n\nCould you be loved and be loved?\nCould you be loved and be loved?\n\nDon't let them change ya, oh!\nOr even rearrange ya! Oh, no!\nWe've got a life to live\nThey say: only, only\nOnly the fittest of the fittest shall survive\nStay alive! Oh!\n\nCould you be loved and be loved?\nCould you be loved, wo now! And be loved?\n\nCould you be\nCould you be\nCould you be lo...",Pop; Axé; Romântico
2,Ivete Sangalo,Cruisin' (Part. Saulo),"Baby, let's cruise, away from here\nDon't be confused, the way is clear\nAnd if you want it you got it forever\nThis is not a one night stand, baby\n\nLet the music take your mind\nJust release & you will find\nYou're gonna fly away\nGlad you're goin' my way\nI love it when we're cruisin' together\nMusic is played for love\nCruisin' is made for love\nI love it when we're cruisin' together\n\nBaby, tonight belongs to us\nEverything's right, do what you must\nAnd inch by inch we get closer & closer\n\nTo every little part of each other, ooh, baby, yeah, so\n\nLet the music take your mind\nJu...",Pop; Axé; Romântico
3,Ivete Sangalo,Easy,"Know it sounds funny\nBut, I just can't stand the pain\nGirl, I'm leaving you tomorrow\nSeems to me girl\nYou know I've done all I can\nYou see I begged, stole and I borrowed! (yeah)\n\nOoh, that's why I'm easy\nI'm easy like Sunday morning\nThat's why I'm easy\nI'm easy like Sunday morning\n\nWhy in the world would anybody put chains on me?\nI've paid my dues to make it\nEverybody wants me to be\nWhat they want me to be\nI'm not happy when I try to fake it! no!\n\nOoh, that's why I'm easy\nI'm easy like Sunday morning, yeah\nThat's why I'm easy\nI'm easy like Sunday morning\n\nI wanna be ...",Pop; Axé; Romântico
4,Ivete Sangalo,For Your Babies (The Voice cover),"You've got that look again\nThe one I hoped I had when I was a lad\nYour face is just beaming\nYour smile got me boasting\nMy pulse roller-coastering\n\nAnyway the four winds that blow\nThey're gonna send me sailing home to you\nOr I'll fly with the force of a rainbow\nThe dream of gold will be waiting in your eyes\n\nYou know I'd do most anything you want\nHey, I, I try to give you everything you need\nI can see that it gets to you\n\nI don't believe in many things\nBut in you, I do\n\nHer faith is amazing\nThe pain that she goes through\nContained in the hope for you\nYour whole world ha...",Pop; Axé; Romântico


In [None]:
# index only a specific artist by the 'Artist' name
nsongs = 300
pd.Series(combined_df.loc[combined_df['Songs'] > nsongs, 'Artist'].unique()).sort_values()

In [11]:
artist_name = "Drake"

artist_df = eng_artists.loc[eng_artists['Artist'] == artist_name].reset_index(drop=True)
artist_df.head()

Unnamed: 0,Artist,SName,Lyric,Genres
0,Drake,Laugh Now Cry Later (Ft. Lil Durk),Sometimes we laugh and sometimes we cry\nBut I guess you know now\nBaby\nI took a half and she took the whole thing slow down\nBaby\nWe took a trip now we on your block\nAnd it's like a ghost town\nBaby\nWhere do these niggas be at\nwhen they say they doing all this and all that\nTired of beefing of bums\nYou can't even pay me enough me to react\n\nBeen waking up in the crib\nAnd sometimes I don't even know where I'm at\nPlease don't play that niggas songs in this party\nI can't even listen to that\nAnytime that I run into somebody it must be a victory lap eh\nShawty come sit on my lap eh\...,Rap; Hip Hop
1,Drake,Fair Trade (feat. Travis Scott),"I don't understand why you blame me\nJust take me as I am, it's the same me\nYeah\nSenseless sight to see\nYeah\nSenseless sight to see\nAyy\n\nFeelin' young but they treat me like the OG\nAnd they want the tea on me, I swear these bitches nosy\nSaid he put some money on my head, I guess we gon' see\nI won't put no money on his head, my niggas owe me\nI gotta be single for a while, you can't control me\nUno, dos, tres, in a race, they can't hold me\nAnd I show my face in a case so you know it's me\nImitation isn't flattery, it's just annoyin' me\nAnd I'm too about it\nAnd the dirt that the...",Rap; Hip Hop
2,Drake,Hotline Bling,"You used to call me on my, you used to, you used to\nYou used to call me on my cell phone\nLate night when you need my love\nCall me on my cell phone\nLate night when you need my love\nI know when that hotline bling\nThat can only mean one thing\nI know when that hotline bling\nThat can only mean one thing\n\nEver since I left the city, you\nGot a reputation for yourself now\nEverybody knows and I feel left out\nGirl you got me down, you got me stressed out\nCause ever since I left the city, you\nStarted wearing less and goin' out more\nGlasses of champagne out on the dance floor\nHangin' ...",Rap; Hip Hop
3,Drake,One Dance,"[Intro: Kyla Wizkid]\nBaby I like your style\n\n[Verse 1: Drake]\nGrips on your waist\nFront way, back way\nYou know that I don't play\nStreets not safe\nBut I never run away\nEven when I'm away\nOti, oti, there's never much love when we go Ot\nI pray to make it back in one piece\nI pray, I pray\n\n[Hook: Drake]\nThat's why I need a one dance\nGot a Hennessy in my hand\nOne more time 'fore I go\nHigher powers taking a hold on me\nI need a one dance\nGot a Hennessy in my hand\nOne more time 'fore I go\nHigher powers is taking a hold on me\n\n[Bridge: Kyla Wizkid]\nBaby I like your style\n\n...",Rap; Hip Hop
4,Drake,Champagne Poetry,"I love you, I love you, I love you\nUntil I, until I\nI love you, I love you, I love you\nUntil I find the only words I know that you\n\nI been hot since the birth of my son\nI remain unphased, trust, worse has been done\nMan, fuck evaluation, show me personal funds\nIt's the pretty boys versus the petty boys\nSold that already, got a whole new set of toys\nShit is so surreal, Drizzy Drake, you better enjoy\nNothing else bigger than the OVO letterman boys\nCashmere knits for the nighttime boat rides\nOli got the first еdition parked up roadside\nThe only sign of strugglin' is coming from t...",Rap; Hip Hop


### 4.Create dataloader using fastai.

### Short lesson: Data and Model. 

##### The model: We will be creating a 'language model', which you can think of as a student that will learn how to write lyrics in the style of some artist.  **Model=Student.** 

##### The learning task: Using 'self-supervised learning' for teaching the student to learn songs without us having to specifically 'label' the song lyrics. Instead, it will learn to take a word in a song and guess the next one (eg. It will get the word "Happy" and try to guess "Birthday"). **The data will be able to 'label' itself since we will be giving it one word in a song, and make the student guess the next one.**

##### Turn lyrics into something the computer can understand:
1. Tokenize: Convert the song lyrics (string) into a list of words. As a simple example, "happy birthday to you, happy birthday to you" -> ["happy", "birthday", "to", "you", ",", "happy", "birthday", "to", "you"]. 
2. Numericalize: Convert each word into a unique token (because the computer likes numbers). To continue the example, ["happy", "birthday", "to", "you", ",", "happy", "birthday", "to", "you"] -> [0, 1, 2, 3, 4, 0, 1, 2, 3] where 0 = "happy", 1 = "birthday", 2 = "to", 3 = "you", 4 = ",". 
3. Dataloaders: Finally, we will make the data for a 'guess next word student'. The way this works is that we give the student "Happy" with "birthday" as the correct answer (label), then "birthday" with "to" as the label, then "to" with "you" as the label and so on. **Student is given the first words in a song and is told to guess the second one. Then the student is given the second and guesses the third. This pattern continues multiple times.**

You can think of the dataloaders as the books we are giving the student to learn from. The books contain questions and answers (similar to a math textbook) so that the student can learn when it is right and wrong. Remember: in this case the 'questions' in the textbook are one word in a song and the 'answer' is the next word in the song. 

In [12]:
lang_model_block = DataBlock(
    blocks=TextBlock.from_df('artist_df', seq_len=72, is_lm=True),
    get_items=ColReader('Lyric')
)
lang_model_block

<fastai.data.block.DataBlock at 0x7f371889ba90>

### Short lesson: Optimizations.

The dataloader uses a few tricks to speed up training the student. 
1. Sequence length: The student has limited memory so we need to break up the songs for them to learn. The sequence length represents how many words in a song we want the student to learn at a time. 
2. Batch size: This student is actually an alien from another planet that has the ability to learn multiple songs at the same time! The batch size represents how many songs we want the student to learn, all at the same time!

In [13]:
dls_lm = lang_model_block.dataloaders(artist_df, bs=128)
dls_lm.show_batch(max_n=2)

Unnamed: 0,text,text_
0,"xxbos [ verse 1 : xxmaj drake ] \n xxmaj check , look \n xxmaj and i ai n't tryna get to know nobody but you \n xxmaj do me a favor , tell me what 's happenin ' \n i am drinkin ' xxup xo , d is sippin ' some xxmaj xxunk \n xxmaj usually the things i desire i always xxunk \n xxmaj back again , xxunk of the","[ verse 1 : xxmaj drake ] \n xxmaj check , look \n xxmaj and i ai n't tryna get to know nobody but you \n xxmaj do me a favor , tell me what 's happenin ' \n i am drinkin ' xxup xo , d is sippin ' some xxmaj xxunk \n xxmaj usually the things i desire i always xxunk \n xxmaj back again , xxunk of the mack"
1,\n xxmaj when i give ya what i give ya \n\n [ chorus :] \n xxmaj when i give ya what i give ya \n ( you gon like that ) \n xxmaj when i give ya what i give ya \n ( you throw it right back ) \n xxmaj there ai n't a women in the world that can doubt that ( oh ) \n i need a women not,xxmaj when i give ya what i give ya \n\n [ chorus :] \n xxmaj when i give ya what i give ya \n ( you gon like that ) \n xxmaj when i give ya what i give ya \n ( you throw it right back ) \n xxmaj there ai n't a women in the world that can doubt that ( oh ) \n i need a women not a


# Train a model using transfer learning

### This is where the magic happens!
Now, we put it all together. We give the student some songs for a particular artist and tell the student to start learning. Hopefully, the student will start to pick up on certain things from that artist's style as it learns. 

In [None]:
learn = language_model_learner(dls_lm, AWD_LSTM, drop_mult=0.3, metrics=accuracy)
learn.fit_one_cycle(5, 0.004)
learn.unfreeze()
# train for 20 more epochs on the new lr
learn.fit_one_cycle(20, lr_max=slice(3e-6, 3e-4))

epoch,train_loss,valid_loss,accuracy,time
0,6.001497,5.594293,0.172681,00:33
1,5.536195,4.882863,0.217637,00:27
2,5.093522,4.452796,0.226052,00:27
3,4.740409,4.261594,0.229961,00:31
4,4.497484,4.229335,0.232613,00:26


epoch,train_loss,valid_loss,accuracy,time
0,4.020443,4.213316,0.233734,00:30
1,4.000458,4.166079,0.237146,00:33
2,3.963469,4.08106,0.242489,00:29
3,3.911074,4.000809,0.248975,00:30
4,3.859694,3.945955,0.254764,00:31
5,3.810256,3.906708,0.26151,00:29
6,3.763493,3.87949,0.266487,00:29
7,3.724699,3.857315,0.271357,00:30
8,3.68988,3.83279,0.274891,00:30
9,3.652989,3.815803,0.278967,00:30


### FYI: I lied.

In reality, there is no...
1. Student
2. Songs books
3. Magic learning

There is actually...
1. A pretrained deep learning model (LSTM)
2. Song data formatted as pytorch tensors in dataloaders
3. A learning cycle taking place, where an optimzer takes gradients and performs back propagation to 'teach' the LSTM

There are also various NLP deep learning architectures (which are like sample students), called 'RNNs', 'LSTMs', 'GRUs', 'Transformers', etc. To be honest with you this isn't that important, as long as you understand the basics, you can build your way up to the rest :)

In [None]:
x, y = first(dls.train)
x.shape, y.shape

# Predictions with Model

In [None]:
def get_most_complex(start_text, preds):
  max_len = 0
  max_i = -1
  for i, pred in enumerate(preds):
    pred_cardinality = len(set(pred.split()))
    if pred_cardinality > max_len:
      max_len = pred_cardinality
      max_i = i
  
  return_str = preds[max_i]

  val = -1
  occurrence = len(start_text.split())
  for i in range(0, occurrence):
    val = return_str.find(' ', val + 1)

  return start_text + return_str[val:return_str.rfind('.')+1]


In [None]:
start_text = "Heyo this is it"
words = 60
sentences = 5
preds = [learn.predict(start_text, words, temperature=0.75)
         for sentence in range(sentences)]

get_most_complex(start_text, preds)

'Heyo this is it is an honest call to call attention to a new thing , and it can be seen when purpose and actions are given away to the naked eye .'