In [1]:
# Rhyme modelling


In [1]:
import rhyme_detection

In [2]:
print(rhyme_detection.sliding_window('patriot', 'hey'))

([['P', 'EY1']], 3.5)


In [3]:
# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import math
import itertools
import pickle

import numpy as np
import pandas

import cmudict
import global_alignment

from syllable_matrices import vowel_matrix, consonant_matrix

cmu_vowels = list(cmudict.phonemes('vowel').keys())
cmu_consonants = list(cmudict.phonemes('consonant').keys())
cmu_dict = cmudict.dict()
cmu_dict_reversed = cmudict.reversed_dict()

# with open('two_tuples.txt', 'rb') as f:
#     similarity = pickle.load(f)
#     ngrams = [' '.join(words) for words in similarity.keys()]


def phoneme_score(phoneme_1, phoneme_2):
    """
    Look up function for the rhyme score of two phonemes given that they are either both vowels or both consonants
    :param phoneme_1: a phoneme according to Arpanet
    :param phoneme_2: a phoneme according to Arpanet
    :return: the corresponding rhyme score of the two phonemes
    """
    if phoneme_1 in cmu_vowels and phoneme_2 in cmu_vowels:
        a_index = cmu_vowels.index(phoneme_1)
        b_index = cmu_vowels.index(phoneme_2)
        if a_index > b_index:
            a_index, b_index = b_index, a_index
        return vowel_matrix[a_index][b_index]
    elif phoneme_1 in cmu_consonants and phoneme_2 in cmu_consonants:
        a_index = cmu_consonants.index(phoneme_1)
        b_index = cmu_consonants.index(phoneme_2)
        if a_index > b_index:
            a_index, b_index = b_index, a_index
        return consonant_matrix[a_index][b_index]
    else:
        return -3 #a consonant is matched with a vowel.


def score_stress(stress_1, stress_2):
    """
    Calculates the stress
    :param stress_1: score from 0,1 or 2
    :param stress_2: score from 0,1 or 2
    :return: Stress score
    """
    if stress_1 == stress_2:  # The same stress scores
        if stress_1 == 1:
            return 1
        elif stress_1 == 2:
            return 1
    elif stress_1 > 0 and stress_2 > 0:  # Not the same stress but one stressed
        return 0
    return 0  # Not the same stress and none stressed


def syllable_score(syllable_1, syllable_2):
    """
    :param syllable_1: a group of phonemes
    :param syllable_2: a group of phonemes
    return the rhyme score for both syllables
    """
    vowel_position_1, vowel_position_2 = vowel_position(syllable_1), vowel_position(syllable_2)
    if vowel_position_1 < 0 or vowel_position_2 < 0:
        return -3
    consonants_1, consonants_2 = [], []
    if len(syllable_1) != vowel_position_1:
        consonants_1 = syllable_1[vowel_position_1 + 1: len(syllable_1)]
    if len(syllable_2) != vowel_position_2:
        consonants_2 = syllable_2[vowel_position_2 + 1: len(syllable_2)]
    vowel_score = phoneme_score(syllable_1[vowel_position_1][0:2], syllable_2[vowel_position_2][0:2])
    stress_score = score_stress(int(syllable_1[vowel_position_1][2]), int(syllable_2[vowel_position_2][2]))
    consonant_score = score_consonants(consonants_1, consonants_2)

    return vowel_score + stress_score + consonant_score


def score_consonants(consonants_1, consonants_2):
    """
    aligns the consonant groups and calculates the rhyme score
    :param consonants_1: Array of consonants
    :param consonants_2: Array of consonants
    :return: the rhyme score
    """
    paired_consonants = align_consonants(consonants_1, consonants_2)
    consonant_score = 0
    for i in range(len(paired_consonants)):
        pair = paired_consonants[i]
        unmatched = 22 if i >= len(paired_consonants) / 2 else 21
        try:
            if pair[0] == '-':
                consonant_score += consonant_matrix[cmu_consonants.index(pair[1])][unmatched]
            elif pair[1] == '-':
                consonant_score += consonant_matrix[cmu_consonants.index(pair[0])][unmatched]
            else:
                try:
                    consonant_score += phoneme_score(pair[0], pair[1])
                except:
                    pass
        except (KeyError,ValueError):
            pass

    if (max(len(consonants_1), len(consonants_2)) > 1):
        consonant_score = consonant_score / max(len(consonants_1), len(consonants_2))

    return consonant_score


def align_consonants(consonants_1, consonants_2):
    """
    Uses global alignment to zip together the two consonant groups
    :param consonants_1: group of consonants
    :param consonants_2: group of consonants
    :return: zipped together consonant groups
    """
    matrix, traceBack = global_alignment.globalAlign(consonants_1, consonants_2)
    xSeq, ySeq = global_alignment.getAlignedSequences(consonants_1, consonants_2, matrix, traceBack)

    return list(zip(xSeq[::-1], ySeq[::-1]))


def vowel_position(syllable):
    """
    :param syllable: list of phonemes
    :return: the index at which the vowel is located
    """
    for i in range(0, len(syllable)):
        if cmudict.phonemes('')[syllable[i][0:2]] == 'vowel':
            return i
    return -1


def sentence_to_syllables(sentence):
    """
    :param sentence: A string
    :return: a sentence split up into syllables
    """
    res = []
    for word in sentence.split(' '):
        res.extend(cmu_dict[word.upper()])
    return res


def rhyme_score(sentence_1, sentence_2):
    """
    Splits a sentence up into syllables and calculates the individual rhyme scores to add them up
    :param sentence_1: A string
    :param sentence_2: A string
    :return: Rhyme scores
    """
    score = 0
    sentence_1, sentence_2 = sentence_1.upper(), sentence_2.upper()
    syllables_1, syllables_2 = sentence_to_syllables(sentence_1), sentence_to_syllables(sentence_2)
    if len(syllables_1) == len(syllables_2):
        for i in range(len(syllables_1)):
            score += syllable_score(syllables_1[i], syllables_2[i])
        score = score / len(syllables_1)
    else:
        raise Exception("Inputs are not of the same type")
    return round(score, 1)


def rhyme_score_syllables(syllables_1, syllables_2):
    """
    takes two lists of syllables and calculates the individual rhyme scores to add them up
    :param sentence_1: A string
    :param sentence_2: A string
    :return: Rhyme scores
    """
    score = 0
    if len(syllables_1) == len(syllables_2):
        for i in range(len(syllables_1)):
            score += syllable_score(syllables_1[i], syllables_2[i])
        score = score / len(syllables_1)
    else:
        print(f"{syllables_1},{syllables_2}")
    return round(score, 1)


def rhyme_matrix(sentence_1, sentence_2):
    """
    Creates a two-dimensional array. One axis is the syllables of sentence_1 the other is the syllables of sentence_2
    the fields are the the rhyme scores of the syllables.
    :param sentence_1: a string
    :param sentence_2: a string
    :return: two dimensional array with rhyme scores
    """
    syllables_1, syllables_2 = sentence_to_syllables(sentence_1.upper()), sentence_to_syllables(sentence_2.upper())
    dimension = (len(syllables_1), len(syllables_2))
    result = np.zeros(dimension)
    for i in range(len(syllables_1)):
        for j in range(len(syllables_2)):
            result[i][j] = syllable_score(syllables_1[i], syllables_2[j])
    return result


def rhyme_table(sentence_1, sentence_2):
    """
    uses the rhyme_matrix but adds the the index and columns
    :param sentence_1:
    :param sentence_2:
    :return rhyme_table
    """
    df = pandas.DataFrame(rhyme_matrix(sentence_1, sentence_2))
    df.index = [''.join(syllable) for syllable in sentence_to_syllables(sentence_1)]
    df.columns = [''.join(syllable) for syllable in sentence_to_syllables(sentence_2)]
    return df


def sliding_window(sentence_1, sentence_2):
    """
    Uses a sliding window to determine where the rhyme takes place
    :param sentence_1: a string
    :param sentence_2: a string
    :return: a tuple with the syllables and the rhyme score
    """
    try:
        syllables_1, syllables_2 = sentence_to_syllables(sentence_1), sentence_to_syllables(sentence_2)
    except KeyError: 
        return ('', 0)
    if len(syllables_1) > len(syllables_2):
        syllables_1, syllables_2 = syllables_2, syllables_1
    scores = []
    size = len(syllables_1)
    for i in range(len(syllables_2) - size + 1):
        window = syllables_2[i:i + size]
        scores.append((window, rhyme_score_syllables(syllables_1, window)))
    result = max(scores, key=lambda x: x[1])
    if needs_penalty(sentence_1, sentence_2):
        return (result[0], result[1] * 0.25)
    else:
        return result


def needs_penalty(sentence_1, sentence_2):
    sentence_1, sentence_2 = sentence_1.lower(), sentence_2.lower()
    words = sentence_1.split(" ") + sentence_2.split(" ")
    return len(set(words)) != len(words)


# TODO dit is mogelijk kapot
def words_from_syllables(syllables, sentence):
    matching_syllables = {word: cmu_dict[word.upper()] for word in sentence.split()}
    result = []
    for k, v in matching_syllables.items():
        for syllable in syllables:
            if syllable in v:
                result.append(k)
                break
    return result


def select_rhyme_words(sentence_1, sentence_2):
    if len(sentence_to_syllables(sentence_1)) > len(sentence_to_syllables(sentence_2)):
        sentence_1, sentence_2 = sentence_2, sentence_1
    return words_from_syllables(sliding_window(sentence_1, sentence_2)[0], sentence_2)


# if __name__ == '__main__':
#     result = []
#     word = "happening"
#     print(cmu_dict["HAPPENING"])
#     print(cmu_dict["DABBLED"])
#     print(cmu_dict["IN"])
#     for ngram in ngrams:
#         try:
#             chosen_rhyme = sliding_window(word, ngram)
#             if chosen_rhyme[1] > 3.0:
#                 result.append(words_from_syllables(chosen_rhyme[0], ngram))
#         except:
#             pass
#     for res in result:
#         if len(res) > 0:
#             print(res)




# NOTESt
# The final score for two given syllables is the sum of the vowel score, normalized consonant score, and stress score.
# This version of CMU has semivowels which are not included into the research by Hirjee
# How is stress score calculated?
# Which unmatched thing should I take? For now I just use the same thing

In [4]:
# Specify the file path and name
import pandas as pd
lyrics = pd.read_csv('/Users/zhenyabudnyk/PycharmProjects/LyrAIX/Thesis/Research Questions/RQ1/prompts.csv')
file_path = 'cmudict.rep'

# Create an empty dictionary to store the word-phoneme mappings
word_phoneme_dict = {}

encodings = ['utf-8', 'latin-1', 'utf-16', 'cp1252']
for encoding in encodings:
    try:
        with open(file_path, 'r') as file:
            for line in file:
                line = line.strip()
                if line:
                    #print(line)
                    split = line.split()
                    word_phoneme_dict[split[0]] = ' '.join(split[1:])
            # break
    except UnicodeDecodeError:
        print('Error')

In [5]:
print(word_phoneme_dict.get("ABACUS"))
print(len(word_phoneme_dict))

AE1 - B AH0 - K AH0 S
129465


Available Memory: 130.46 GB


In [13]:
import psutil

# Get the current available memory in bytes
available_memory = psutil.virtual_memory().available

# Convert bytes to human-readable format
available_memory_gb = available_memory / (1024 ** 3)  # Convert bytes to gigabytes

# Print the available memory
print(f"Available Memory: {available_memory_gb:.2f} GB")

In [6]:
import re
def clean_for_rhymes(lyric):
    lyric = re.sub(r"[^a-zA-Z0-9\s]", '', lyric) # remove special chars except [] and ()
    lyric = lyric.upper()
    # lyric = lyric.split('')
    lyric = lyric.split('\n') # split by new lines
    lyric = list(filter(lambda line: line != '', lyric)) # delete an empty line
    for i, line in enumerate(lyric):
        line = line.split()
        if len(line) >= 1:
            lyric[i] = line[-1] # take the last word of each line
        # else:
        #     lyric[i] = line[-1]
    #print(lyric)
    return lyric

In [98]:
from tqdm import tqdm
tqdm.pandas()
lyrics['end words'] = lyrics['lyrics'][:2].progress_apply(clean_for_rhymes)

In [25]:
# lyrics['end words'][60000]

In [33]:
pattern = r"(\[\s*(Verse|Hook|Bridge|Chorus|Pre-Chorus|Intro|Outro|Refrain|Pre-Hook|Pre Hook|Pre Chorus|pre-Chorus|Interlude).*\]\n|\(\((Verse|Hook|Bridge|Chorus|Pre-Chorus|Intro|Outro|Refrain|Pre-Hook|Pre Hook|Pre Chorus|pre-Chorus|Interlude).*\)\)\n|\((Verse|Hook|Bridge|Chorus|Pre-Chorus|Intro|Outro|Refrain|Pre-Hook|Pre Hook|Pre Chorus|pre-Chorus|Interlude).*\)\n|(VERSE|HOOK|BRIDGE|CHORUS|PRE-CHORUS|INTRO|OUTRO|REFRAIN|PRE-HOOK|PRE HOOK|PRE CHORUS|INTERLUDE).*\n)"
text = """
[Chorus]
I hate to be the bearer of (Bad news)
As the cracks appear to run (We'll meet)
But we'll meet again on better terms
Yeah

[Verse]
You provoke tears that stain
You control my face
You arrange it to show
A broken expression

[Chorus]
I hate to be the bearer of (Bad news)
As the cracks appear to run (We'll meet)
But we'll meet again on better terms
[Bridge]
I hate to be the bearer of (Bad news)
As the cracks appear to run (We'll meet)
But we'll meet again on better terms

[Outro]
Hope to God we get out alive
Hope to God we get out alive
"""
text2 = """ 
Here I stand before you(Love)
Completely naked and vulnerable in a manner of speaking
Cause manners of speaking are that which I build my life upon
And I’m tryna build a super righteous one, excuse me...
[Intro]
The only question I have for you, is are you here to add or subtract from the story?
Money is far from an issue, in fact, I say Math eww, like corey, fuck the numbers...
I’ve been a savage in my own right...right?
Wonder if you prepared for a long flight 
Already you managed to skip a couple twin entendres
You can’t find the mine, I guess that means the bomb safe
But I’m the type of guy that wanna see the world burn
Cause I’m thinking one burning bush made the world turn
As I transcend this earthly body and soul
I break the mold of what they said I could be
And add greatness to my previous limitations
Fuck the court system and all their dishonest litigations
You’d swear I’m not sleeping with an attorney now
Treat my body like eternity, but it’s burning out
I give and go everything that I can though
My mind is bright enough baby, so blow out all them candles
Please bring some moet’s for the poets
Toasting to my virgo past life when there was no sex
Now I’m bitter when I hit her, getting cut like bow flex
That last bit was so predictable 
I take chances, wear red, and hit the bull
Willing to give my worst enemy my residuals
I’m standing out even when I’m among individuals
I’m sick with consistency, never intervals
Coming at you from different angles, that pivotal
And the fact that they still ignore it is pitiful
One day I’ll move em all, I just need a little room
My little room is where the sound is made at 
But soon enough these rich niggas gone play me in they maybach
Then all them pretty girls that overlooked me way back
Ignored the hype when I typed in skype and said play that
Spirit’s above it all...try not to focus on it
Astral luminescence...It makes for doper phonics
I’m quick with the magic, this hocus pocus sonic
Plus none of it’s a trick, I’m just so open honest
This honesty’s what he thrives on
Bet all your dishonesty’s what you die from
They tell me you gone wake the world up when you rise son
So far I’ve been coming up short as if I’m 5’1”
The fall from here is painless, but I cry some
Watch your psyche burn brightly once the lies done
This virtuous experience made me
An independent with the guile to trample majors like a bison
See those the kinda lines we really proud of
When I say we, I mean he, and everyone around us
Just a small attempt at me being a humble dude
But when we try to shake it up, they wanna rumble too
If you don’t have my name, you’re stuck at number 2
And since it’s not contagious, you’re never catching my dragon flu
93p making the beat sound like a dragon flute
The ghost of Christopher Wallace possession only passing through

Thank you..for paying attention..."""

text3 = """CHORUS:
I feel strange
I feel changed
I feel strange.....overcome...overcome by you

I fell in too deep
But i learned to swim
In an undertow
Since i'm giving in

[Interlude]
I feel strange
I feel changed
I feel strange...overcome...overcome

I'm a little bit wiser, I'm a little bit sadder
I'm a little bit less, you might have guessed
But if you could be staying
Tell me now, darlin', I think I'm fading
I swear I'll never trade your love for a lie
(CHORUS):
(overcome..overcome...
Sometimes it helps me to give it up
In some ways you're never gonna get it
Some nights i feel i'm gonna give it
Sunlight...every night
Overcome...overcome)

...hold still, come on and save me
...hold still, come on and let me
...hold still come on again...
...hold still, come on and let me"""
# text = "[Verse 1] This is a verse. (Verse 2) Another verse. ((Verse 3)) More verses. VERSE 4"
# text = re.sub(r'\n', '', text)
# matches = re.finditer(pattern, text, re.IGNORECASE)
pattern = r"(\[\s*(Verse|Hook|Bridge|Chorus|Pre-Chorus|Refrain|Pre Chorus|Interlude).*\]\n|\(\((Verse|Hook|Bridge|Chorus|Pre-Chorus|Refrain|Pre Chorus|Interlude).*\)\)\n|\((Verse|Hook|Bridge|Chorus|Pre-Chorus|Refrain|Pre Chorus|Interlude).*\)\n|(VERSE|HOOK|BRIDGE|CHORUS|PRE-CHORUS|REFRAIN|PRE CHORUS|INTERLUDE).*\n)"
#pattern_unwanted = r"\[\s*(?!(Verse|Hook|Bridge|Chorus|Pre-Chorus)).*?\]\n|\(\(\s*(?!(Verse|Hook|Bridge|Chorus|Pre-Chorus)).*?\)\)\n|\(\s*(?!(Verse|Hook|Bridge|Chorus|Pre-Chorus)).*?\)\n"

# pattern = r"\[\s*((?:Verse|Hook|Bridge|Chorus|Pre-Chorus|Refrain|VERSE|HOOK|BRIDGE|CHORUS|PRE-CHORUS|REFRAIN)\b.*?)\]\n|\(\(\s*((?:Verse|Hook|Bridge|Chorus|Pre-Chorus|Refrain|VERSE|HOOK|BRIDGE|CHORUS|PRE-CHORUS|REFRAIN)\b.*?)\)\)\n|\(\s*((?:Verse|Hook|Bridge|Chorus|Pre-Chorus|Refrain|VERSE|HOOK|BRIDGE|CHORUS|PRE-CHORUS|REFRAIN)\b.*?)\)\n"

# antipattern = r"\[\s*(?!((?:Verse|Hook|Bridge|Chorus|Pre-Chorus|Refrain|VERSE|HOOK|BRIDGE|CHORUS|PRE-CHORUS|REFRAIN)\b)).*?\n.*\n|\(\(\s*(?!((?:Verse|Hook|Bridge|Chorus|Pre-Chorus|Refrain|VERSE|HOOK|BRIDGE|CHORUS|PRE-CHORUS|REFRAIN)\b)).*?\)\)\n.*\n|\(\s*(?!((?:Verse|Hook|Bridge|Chorus|Pre-Chorus|Refrain|VERSE|HOOK|BRIDGE|CHORUS|PRE-CHORUS|REFRAIN)\b)).*?\)\n.*\n"

antipattern = r"(?m)\[[^\]]*\][\s\S]*|\([^)]*\)[\s\S]*|\(.*\)[\s\S]*"

remove_brackets_pattern = r'(?<!\n)([\(\[\{])(.*?)([\)\]\}])'
replacement = r'\2'


# result = re.sub(pattern, replacement, "\n"+lyrics['lyrics'][9554])
def split_into_parts(text):
    result = re.sub(remove_brackets_pattern, replacement, "\n"+text)
    # print(result)
    # result = re.sub(pattern_unwanted, '', text)
    modified_text = re.sub(pattern, r'placeholder\1', result)
    #print(modified_text)
     # modified_text

    # if 'placeholder' not in modified_text:
    #     # If there is no placeholder in the text, assign all text to [Song]
    #     modified_text = 'placeholder[The Whole Song]'+modified_text
    # print(modified_text)
    # modified_text = re.sub(pattern, '', modified_text)
    split_text = modified_text.split('placeholder')
    #print(split_text)
    # print(split_text)
    split_text = [part for part in split_text if part and part.strip() != '']
    #print(split_text)
    song_parts = []
    
    for part in split_text:
        try:
            #part = part[1:]
            part_name, part_text = part.split("\n", 1)
            # print(part_text)
            # print(part_name)
            part_text = re.sub(antipattern, '', part_text)
            # print(part_text)
            # print(part_name)
            part_name = part_name.strip()
            part_text = part_text.strip()
            song_parts.append((part_name, part_text))
        except ValueError:
            return song_parts

    #print(len(song_parts))
    if not song_parts[0][0]:
        song_parts = song_parts[1:]
    #print(len(song_parts))
    return song_parts

#print(lyrics['lyrics'][1])
# p1 = split_into_parts(text2)
# #print(p1)
# for i in p1:
#     print(i[0]+" part name---------------")
#     print(i[1])
# Intro solo on guitar

100%|██████████| 2797631/2797631 [09:31<00:00, 4894.78it/s]


In [34]:
from tqdm import tqdm
new_data = []

for index, row in tqdm(lyrics.iterrows(), total=lyrics.shape[0]):
    song_parts = split_into_parts(row['lyrics'])
    for part_name, part_text in song_parts:
        new_row = {'artist': row['artist'], 'title': row['title'], 'tag': row['tag'], 'year': row['year'], 'views': row['views'], 
                   'part': part_name, 'lyrics': part_text}
        new_data.append(new_row)

# Convert the list to a DataFrame
new_df = pd.DataFrame(new_data)
new_df.dropna(subset=['lyrics'], inplace=True)

# Save the new DataFrame to a new CSV file
new_df.to_csv("song_parts.csv", index=False)





Unnamed: 0,artist,title,tag,year,views,part,lyrics
0,JAY-Z,Can I Live,rap,1996,468624,[Verse 1],While I'm watchin' every nigga watchin' me clo...
1,JAY-Z,Can I Live,rap,1996,468624,[Chorus],Ge-ge-geyeahhh\nCan I live?\nCan I live?
2,JAY-Z,Can I Live,rap,1996,468624,[Verse 2],My mind is infested with sick thoughts that ci...
3,JAY-Z,Can I Live,rap,1996,468624,[Chorus],Can I live?\nCan I live?\nCan I live?\nCan I l...
4,Fabolous,Forgive Me Father,rap,2003,4743,[Hook],Forgive me father for I have sinned\nBut look ...
...,...,...,...,...,...,...,...
5472336,Alana Springsteen,New Number,country,2022,1,[Chorus],One that I ain't dial at least a couple thousa...
5472337,Alana Springsteen,New Number,country,2022,1,[Verse 2],You need a new number and you can't get it fas...
5472338,Alana Springsteen,New Number,country,2022,1,[Chorus],One that I ain't dial at least a couple thousa...
5472339,Alana Springsteen,New Number,country,2022,1,[Bridge],"Oh, if you wanna help me out\nIf you wanna let..."


In [35]:
new_df

In [7]:
def translate_to_phonemes(end_words):
    for i, end_word in enumerate(end_words):
        print(end_word)
        try:
            phoneme = word_phoneme_dict.get(end_word)
            phoneme = str(phoneme).split('-')[-1]
            end_words[i] = phoneme.split()
            # print(end_word[i])
        except KeyError or AttributeError:
            end_words[i] = None
    return end_words

In [8]:

def find_scheme(end_phonemes):
    scheme = ["*"] * len(end_phonemes)
    phoneme_group = {}  # stores the group of each phoneme
    pointer = 0

    for i, word in enumerate(end_phonemes):
        for phoneme, group in phoneme_group.items():
            if sliding_window(word, phoneme)[1] >= 2:  # if rhyme score >= 2
                scheme[i] = group  # assign existing group
                break
        else:  # if no group was assigned
            scheme[i] = str(pointer) + '-'
            phoneme_group[word] = str(pointer) + '-' # create new phoneme group
            pointer += 1
    return scheme
    

In [9]:
def build_scheme(lyric):

    #print(lyric)
    final_scheme = ""

    end_words = clean_for_rhymes(lyric)
    #print(end_words)
    scheme = find_scheme(end_words)
    final_scheme += ''.join(scheme)
    #print(final_scheme)
    
    return final_scheme[:-1]
    

In [2]:
import pandas as pd
lyrics = pd.read_csv('song_parts.csv')


Unnamed: 0,artist,title,tag,year,views,part,lyrics,explicitness,"tokenized, lemmatized, no bad words",predicted_topics,topic vectors,schemes
0,JAY-Z,Can I Live,rap,1996,468624,[Verse 1],While I'm watchin' every nigga watchin' me clo...,Explicit content,"['watchin', 'every', 'watchin', 'closely', 'bu...",wealth and authority,"[(0, 0.113372415), (1, 0.39564058), (2, 0.0583...",0-0-0-0-1-1-0-2-0-3-4-5-0-6-7-8-7-9-7-5-5-10-1...
1,JAY-Z,Can I Live,rap,1996,468624,[Chorus],Ge-ge-geyeahhh\nCan I live?\nCan I live?,Normal,"['geyeahhh', 'live', 'live']",violence,"[(0, 0.05250941), (1, 0.1470429), (2, 0.052438...",0-1-2
2,JAY-Z,Can I Live,rap,1996,468624,[Verse 2],My mind is infested with sick thoughts that ci...,Normal,"['mind', 'infested', 'sick', 'thought', 'circl...",wealth and authority,"[(0, 0.10937761), (1, 0.3868071), (2, 0.050701...",0-1-1-1-2-3-4-5-5-6-6-1-1-1-7-7-8
3,JAY-Z,Can I Live,rap,1996,468624,[Chorus],Can I live?\nCan I live?\nCan I live?\nCan I l...,Normal,"['live', 'live', 'live', 'live']",violence,"[(0, 0.044248056), (1, 0.12390851), (2, 0.0441...",0-1-2-3
4,Fabolous,Forgive Me Father,rap,2003,4743,[Hook],Forgive me father for I have sinned\nBut look ...,Explicit content,"['forgive', 'father', 'sinned', 'look', 'money...",violence,"[(0, 0.05578675), (1, 0.19629966), (2, 0.09306...",0-0-0-0-1-0-0-0
...,...,...,...,...,...,...,...,...,...,...,...,...
5334073,Alana Springsteen,New Number,country,2022,1,[Chorus],One that I ain't dial at least a couple thousa...,Normal,"['one', 'dial', 'least', 'couple', 'thousand',...",violence,"[(0, 0.02372636), (1, 0.1153468), (2, 0.023694...",0-1-0-2-3
5334074,Alana Springsteen,New Number,country,2022,1,[Verse 2],You need a new number and you can't get it fas...,Normal,"['need', 'new', 'number', 'fast', 'enough', 'c...",home and nature,"[(0, 0.17068717), (1, 0.13369475), (2, 0.02103...",0-0-1-2
5334075,Alana Springsteen,New Number,country,2022,1,[Chorus],One that I ain't dial at least a couple thousa...,Normal,"['one', 'dial', 'least', 'couple', 'thousand',...",violence,"[(0, 0.021061467), (1, 0.15476061), (2, 0.0210...",0-1-0-2-3-2-4-5
5334076,Alana Springsteen,New Number,country,2022,1,[Bridge],"Oh, if you wanna help me out\nIf you wanna let...",Normal,"['wanna', 'help', 'wanna', 'let', 'easy', 'car...",home and nature,"[(0, 0.027157595), (1, 0.07908834), (2, 0.0271...",0-1-2-3-3-2


In [42]:
lyrics

0-1-2


In [10]:
#print(lyrics.at[1, 'schemes'])
print(build_scheme(lyrics.at[1, 'results']))

0-1-2-3-4-2-4-5


In [None]:
count = lyrics['lyrics'].isna().sum()
print(count)
lyrics.dropna(subset=['lyrics'], inplace=True)
count = lyrics['lyrics'].isna().sum()
print(count)

 33%|███▎      | 1782111/5334078 [24:20<42:32, 1391.57it/s]  

In [11]:
from tqdm import tqdm
tqdm.pandas()
# print(lyrics['lyrics'][6])
# print(lyrics['lyrics'][559])
lyrics['predicted scheme'] = lyrics['results'].progress_apply(build_scheme)


100%|██████████| 300/300 [00:00<00:00, 1070.66it/s]


In [12]:
lyrics

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Valence,Arousal,Dominance,Part,Artist,Topic,Explicitness,Scheme,Instruction,results,predicted explicitness,predicted scheme
0,0,0,0,High,Low,Medium,Chorus,Sofya Wang,life and relationships,Normal,0-0-0-1-0-0-1-1,Generate a Chorus of song lyrics in the style ...,"We're lying, we're lying in the shadows\nI'm f...",Normal,0-1-2-3-4-5-6-7-8-9
1,1,1,1,High,Medium,Low,Bridge,Chiddy Bang,life and relationships,Normal,0-0-1-2-3-4-5-6,Generate a Bridge of song lyrics in the style ...,You don't wanna party\nWhy you tryna party\nI ...,Normal,0-1-2-3-4-2-4-5
2,2,2,2,High,Medium,High,Chorus,Shwayze,life and relationships,Normal,0-1-2-0-3-3-4,Generate a Chorus of song lyrics in the style ...,"And if you run from me, baby, run from me\nYou...",Normal,0-1-2-3-4-5-4
3,3,3,3,Medium,Low,High,Bridge,Dove Cameron,general,Normal,0-1-2-1-1-1,Generate a Bridge of song lyrics in the style ...,"Swing on by, all you can hear is me\nLet's get...",Normal,0-1-2-3-4-5
4,4,4,4,Medium,High,Medium,Chorus,Weezer,general,Normal,0,Generate a Chorus of song lyrics in the style ...,Singing like a symphony in a restaurant\n,Normal,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,295,295,295,High,High,Low,Verse,The Irish Rovers,general,Normal,0-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18...,Generate a Verse of song lyrics in the style o...,Down to the wire\nI'm down to the wire\nI'm do...,Normal,0-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18
296,296,296,296,High,Low,High,Verse,Genius English Translations,life and relationships,Explicit content,0-0-0-0,Generate a Verse of song lyrics in the style o...,"Well, I want to taste my eyes\nI wanna see the...",Normal,0-0-1-2
297,297,297,297,Low,High,Medium,Verse,Tiana Major9 & SiR,general,Normal,0-1-2-1,Generate a Verse of song lyrics in the style o...,"It's a big world, but I gotta do my thing\nThe...",Normal,0-1-2-3
298,298,298,298,Medium,Medium,Low,Verse,Spacey Jane,religion and society,Explicit content,0-1-2-3,Generate a Verse of song lyrics in the style o...,Him to me a mwah a cryd for the people that kn...,Normal,0-0-0-0


In [13]:
lyrics

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Valence,Arousal,Dominance,Part,Artist,Topic,Explicitness,Scheme,Instruction,results,predicted explicitness,predicted scheme
0,0,0,0,High,Low,Medium,Chorus,Sofya Wang,life and relationships,Normal,0-0-0-1-0-0-1-1,Generate a Chorus of song lyrics in the style ...,"We're lying, we're lying in the shadows\nI'm f...",Normal,0-1-2-3-4-5-6-7-8-9
1,1,1,1,High,Medium,Low,Bridge,Chiddy Bang,life and relationships,Normal,0-0-1-2-3-4-5-6,Generate a Bridge of song lyrics in the style ...,You don't wanna party\nWhy you tryna party\nI ...,Normal,0-1-2-3-4-2-4-5
2,2,2,2,High,Medium,High,Chorus,Shwayze,life and relationships,Normal,0-1-2-0-3-3-4,Generate a Chorus of song lyrics in the style ...,"And if you run from me, baby, run from me\nYou...",Normal,0-1-2-3-4-5-4
3,3,3,3,Medium,Low,High,Bridge,Dove Cameron,general,Normal,0-1-2-1-1-1,Generate a Bridge of song lyrics in the style ...,"Swing on by, all you can hear is me\nLet's get...",Normal,0-1-2-3-4-5
4,4,4,4,Medium,High,Medium,Chorus,Weezer,general,Normal,0,Generate a Chorus of song lyrics in the style ...,Singing like a symphony in a restaurant\n,Normal,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,295,295,295,High,High,Low,Verse,The Irish Rovers,general,Normal,0-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18...,Generate a Verse of song lyrics in the style o...,Down to the wire\nI'm down to the wire\nI'm do...,Normal,0-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18
296,296,296,296,High,Low,High,Verse,Genius English Translations,life and relationships,Explicit content,0-0-0-0,Generate a Verse of song lyrics in the style o...,"Well, I want to taste my eyes\nI wanna see the...",Normal,0-0-1-2
297,297,297,297,Low,High,Medium,Verse,Tiana Major9 & SiR,general,Normal,0-1-2-1,Generate a Verse of song lyrics in the style o...,"It's a big world, but I gotta do my thing\nThe...",Normal,0-1-2-3
298,298,298,298,Medium,Medium,Low,Verse,Spacey Jane,religion and society,Explicit content,0-1-2-3,Generate a Verse of song lyrics in the style o...,Him to me a mwah a cryd for the people that kn...,Normal,0-0-0-0


In [14]:
lyrics.to_csv('/Users/zhenyabudnyk/PycharmProjects/LyrAIX/Thesis/Research Questions/RQ1/prompts.csv', index=False)


In [17]:
match = 0
for i in range(300):
    if lyrics.loc[i, 'Scheme'] == lyrics.loc[i, 'predicted scheme']:
        print(lyrics.loc[i, 'predicted scheme'])
        match += 1
print(match)

0
0-1
0-1-2-3
0-1-2-3
0-1-2-3
0-1-2-3-4
0-1-2-3
0
0
0-1
0-1
0-1-2-3-4-5-6-7
0-0-0-1
0-1-2-3
0-1-2-3
0-1-2-3
0-1-2
0-1
0-1-2-3-4-5-6
0-1-2-3
0-1-2-3
0-1-2-3
0-1-2-3
0
0-1
0-1-2-3
0-1-2-3
0
0
0-0-1-2
0-1-2-3
0-1-2-3-4-5
0-1-2-3
0-1-2-3-4
0-1-2-3
0
0-1
0-1-2-3
0
0-1-2-3
0-1
0-1-2-3
0-1-2-3
0-1
0-1-2-3
0-1-2
46


In [191]:
new_df.to_csv('song_parts.csv', index=False)

('', 0)


In [4]:
print(sliding_window('fazin', 'praisin'))

Drop the top off of the roof and now its missin Oh its missin now
Drop a couple tracks and now we drippin Oh its drippin now
Always talkin shit like who you dissin' who you dissin on
Switchin up because the whole team winnin Oh we winnin now
0-1-2-3
5334078


In [None]:
#print(lyrics['schemes'][2797630])
#print(build_scheme(lyrics['lyrics'][2797630]))
print(lyrics['lyrics'][2797630])
print(lyrics['schemes'][2797630])
print(len(lyrics))

# print(lyrics['schemes'][808])
# print(lyrics['lyrics'][808])

# print(lyrics['schemes'][9554])
# print(lyrics['lyrics'][9554])

# remove the separators - too much