# EminemNN Live Demo

## Loading data and models

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Pip installs
!pip3 install pronouncing
!pip3 install textstat
!pip3 install markovify
!pip install transformers

# Import libraries
import subprocess
import re
import tensorflow as tf
import random as rand
import numpy as np
import pronouncing
import textstat
import markovify
import math
import warnings
warnings.filterwarnings('ignore')
from keras.utils import pad_sequences
from sklearn.feature_extraction.text import CountVectorizer
from scipy.spatial.distance import pdist, squareform

# Load Eminem lyrics data
dataFilePath = '/content/drive/MyDrive/ALL_eminem.txt'
with open(dataFilePath, 'r') as file:
  data = (file.read())
eminemBars = data.split('\n') # split lyrics dataset into bars
tokeniser = tf.keras.preprocessing.text.Tokenizer(num_words=20000)
tokeniser.fit_on_texts(eminemBars)

# Load simple RNN model
loaded_modelRNN = tf.keras.models.load_model('/content/drive/MyDrive/EminemNN_simpleRNNmodel.h5')


Mounted at /content/drive
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pronouncing
  Downloading pronouncing-0.2.0.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting cmudict>=0.4.0
  Downloading cmudict-1.0.13-py3-none-any.whl (939 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m939.3/939.3 kB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
Collecting importlib-metadata<6.0.0,>=5.1.0
  Downloading importlib_metadata-5.2.0-py3-none-any.whl (21 kB)
Building wheels for collected packages: pronouncing
  Building wheel for pronouncing (setup.py) ... [?25l[?25hdone
  Created wheel for pronouncing: filename=pronouncing-0.2.0-py2.py3-none-any.whl size=6251 sha256=a25a50fa0cf5ec9617d3166768f0d14068ee180c7b0a947bed58636866d9071e
  Stored in directory: /root/.cache/pip/wheels/ee/d4/c2/fb8c0e2009b75358874506ff2ce1ee79370b6ef5cf08922206
Successfully built pronouncing
Installing collected pac

## Simple RNN for rap lyric generation


Supplementary code

In [2]:
markovModel = markovify.NewlineText(str("\n".join(eminemBars)), well_formed=False, state_size=3) # language model to create seed phrases
def compareBars(bar, eminemBars):
  """
  Compare the generated bar to each of Eminem's bars
  """
  totalDist = 0
  count = 0

  for eb in eminemBars:
    vectoriser = CountVectorizer()
    barVec = vectoriser.fit_transform([bar, eb])

    # Cosine distance
    barArr = barVec.toarray()
    dist = 1-pdist(barArr, 'cosine')[0]
    if not math.isnan(dist):
      totalDist += dist
      count += 1
  avDist = totalDist/count # lower score means more unique bars
  return avDist

def rateBar(bar, eminemBars, eminemReadability, eminemRhymeIdx):
  """
  Calculate a rating for the generated bar based on readability, rhyme density and comparison to Eminem's bars
  """
  readability = textstat.automated_readability_index(bar)
  rhymeIdx = getRhymeDensity(bar)
  comparison = compareBars(bar, eminemBars)

  rating = (eminemReadability - readability) + (eminemRhymeIdx - rhymeIdx) + comparison # lower rating is better
  return rating

def getReadability(bars):
  """
  Returns the average readability score for the given bars
  """
  total = 0
  count = len(bars)

  for bar in bars:
    total += textstat.automated_readability_index(bar)

  averageReadability = total / count
  return averageReadability

def getRhymeDensity(bars):
  """
  Returns the average rhyme density for the given bars
  """
  totalSyllables = 0
  rhymedSyllables = 0

  for bar in bars:
    for word in bar.split():
      p = pronouncing.phones_for_word(word)
      if len(p) == 0:
        break
      syllables = pronouncing.syllable_count(p[0])
      totalSyllables += syllables
      doesRhyme = False
      for rhyme in pronouncing.rhymes(word):
        if doesRhyme:
          break
        for idx, b in enumerate(bars):
          if idx > 4:
            break
          if rhyme in b:
            rhymedSyllables += syllables
            doesRhyme = True
            break
  rhymeDensity = rhymedSyllables / totalSyllables
  return rhymeDensity

def generateBar(seedPhrase, model, barLen):
  """
  Generates a bar based on seed phrase
  """
  for i in range(barLen):
    seedTokens = pad_sequences(tokeniser.texts_to_sequences([seedPhrase]), maxlen=29)
    p = model.predict(seedTokens, verbose=0)
    word = np.argmax(p, axis=1)[0]-1
    seedPhrase += " " + str(list(tokeniser.word_index.items())[word][0])
  return seedPhrase

def generateRap(model, eminemBars, userPrompt, eminemReadability, eminemRhymeIdx, barLen=10, rapLen=5, minThreshold=-0.2, maxThreshold=0.2, attempts=1):
  """
  Generates a rap
  """
  rap = []
  bars = 0
  count = 0
  potentialBars = []

  while len(rap) < rapLen:
    if len(rap)==0:
      seedPhrase = userPrompt
    else:
      seedPhrase = markovModel.make_sentence(tries=100).split(" ") # use Markov model to generate seed phrase
      seedPhrase = " ".join(seedPhrase[:3])
    count += 1
    bar = generateBar(seedPhrase, model, rand.randrange(4, barLen))
    barRating = rateBar(bar, eminemBars, eminemReadability, eminemRhymeIdx) 
    potentialBars.append((barRating, bar))

    if barRating <= maxThreshold and barRating >= minThreshold:
      rap.append(bar)
      bars += 1
      count = 0
      print("Generated Bar:", bars, "\n", bar)

    if count >= attempts:
      lowest = np.Infinity
      bestBar = ""
      for bar in potentialBars:
        if bar[0] < lowest:
          bestBar = bar[1]
          potentialBars = []
      
      rap.append(bestBar)
      bars += 1
      count = 0
      print("Generated Bar:", bars, "\n", bestBar)

  return rap



Generate rap with simple RNN

In [3]:
userPrompt = input("Enter a prompt for the rap: ")
print("\nGenerating rap...")
rap = generateRap(loaded_modelRNN, eminemBars, userPrompt, 2.9082144822041736, 0.3032329559883949) # Eminem readability and rhyme index pre-calculated to save time

print("\nRap Generated with Simple RNN:")
for line in rap:
  print(line)
print()

Enter a prompt for the rap: I love computer science

Generating rap...
Generated Bar: 1 
 I love computer science freeze fall dude back down there
Generated Bar: 2 
 I breathe on you do me for me
Generated Bar: 3 
 For all the feeling alone for you
Generated Bar: 4 
 And just make what 8x mean hailie say eminem yeah
Generated Bar: 5 
 I never meant back again baby baby boy i know why what

Rap Generated with Simple RNN:
I love computer science freeze fall dude back down there
I breathe on you do me for me
For all the feeling alone for you
And just make what 8x mean hailie say eminem yeah
I never meant back again baby baby boy i know why what



## Transformer model for rap lyric generation

Supplementary code

In [4]:
def generate_lyric(start: str, length: int, topk: int) -> list[str]:
  length_flag = "--length=" + str(length)
  result = subprocess.run(['python', '/content/drive/MyDrive/run_generation.py', '--model_type', 'gpt2', '--model_name_or_path', '/content/drive/MyDrive/emNN_finetuned', '--prompt', start, '--stop_token', '', '--k', '50', length_flag, '--num_return_sequences', str(topk)], capture_output=True)
  result_str = result.stdout.decode('utf-8')
  result_str = re.sub('=== GENERATED SEQUENCE 1 ===\n', "", result_str)
  result_str = re.sub('<EOS>', '', result_str)
  result_str = re.sub('<BOS>', '', result_str)
  result_list = re.split('=== GENERATED SEQUENCE \d ===\n', result_str)
  return result_list

Generate rap with GPT-2 transfomer

In [5]:
lyrics_res = generate_lyric(userPrompt, 200, 1)
print(lyrics_res[0].replace('"', '\n'))

I love computer science and love writing.

 
We need more people with computer science skills, because we want more talent.
 I said 
Yeah, you got a job, but don't let me get in your office.
 
What the fuck? How the fuck are you supposed   on my plane? I'm in the wrong plane!
 I said 
Oh, no, no! I'm in the wrong plane!
 
You're getting in your seat, you're on my plane! I'll take you to where you're supposed to be!
 But I'm not in the right plane.
 
Oh no!
 
Yeah, no!
 
What the fuck are you supposed to know? You're not supposed to know!

 I don't mean 
I don't like it
 
What the fuck do you say?
 
Oh, my God!
 
Oh no!
 
You ain't got no brain! You ain't got no brain! You ain't got no brain! You ain't got no brain!


