In [86]:
import pylyrics3 as pl
import poetrytools as pt
import pandas as pd

In [87]:
def get_rhyme_ratio(poem):
    """
    Get the ratio of rhymes to total lines (total unique ending phonemes divided by total number of lines). 
    The bigger the number the more diverse the rhymescheme ie. the less rhymes per line.
    """
    
    total_rhyme = pt.rhyme_scheme(poem)
    while ' ' in total_rhyme: total_rhyme.remove(' ')
    unique_rhyme = set(total_rhyme)
    rhyme_ratio = len(unique_rhyme) / len(total_rhyme)
    return rhyme_ratio

In [88]:
artist = pl.get_artist_lyrics('broadcast', albums=True)

In [107]:
artist_albums = []
artist_songs = []
artist_lyrics = []
rhyme_ratios = []
rhyme_types = []
metres = []
word_frequency = []

In [108]:
for album in artist.items(): # Organize all songs and the albums they belong to for later use in Pandas DataFrame
    for song in album[1]:
        artist_albums.append(album[0]) # Get Album info
        artist_songs.append(song) # Get Song info

In [109]:
for album in artist.values(): # Get Lyrics for each song
    for lyrics in album.values():
        artist_lyrics.append(lyrics)

In [110]:
for lyric in artist_lyrics: # Get rhyme ratio of each song
    if type(lyric) == str:
        poem = pt.tokenize(lyric)
        poem = get_rhyme_ratio(poem)
    else:
        poem = "N/A"
    rhyme_ratios.append(poem)
    
for lyric in artist_lyrics: # Get rhyme type of each song
    if type(lyric) == str:
        poem = pt.tokenize(lyric)
        poem = pt.guess_rhyme_type(poem)[-1]
    else:
        poem = "N/A"
    rhyme_types.append(poem)

for lyric in artist_lyrics: # Get metre of each song
    if type(lyric) == str:
        poem = pt.tokenize(lyric)
        poem = pt.guess_metre(poem)[-1]
    else:
        poem = "N/A"
    metres.append(poem)

In [129]:
for lyric in artist_lyrics: # Get a dictionary of word frequency for each song
    word_count = {}
    if type(lyric) == str:
        lyric = lyric.lower()
        lyric = lyric.split()
        for word in set(lyric):
            word_count[word] = lyric.count(word)
        word_frequency.append(word_count)
    else:
        word_frequency.append(word_count)
    print(word_count)

print(len(word_frequency))

{'you': 1, 'can': 1, "there's": 1, 'for': 1, 'the': 2, 'take': 1, "you've": 1, 'looking': 1, 'got': 1, 'break': 1, 'will': 1, 'strain': 1, 'believe': 1, 'weight': 1, 'to': 2, 'been': 2, 'up': 1, 'not': 1, 'my': 1, 'think': 1, "i've": 1, 'and': 1, 'or': 1, 'when': 1, 'try': 1, 'tree': 1, 'under': 1, 'plunge': 1, 'time': 1, 'guarantee': 1, 'i': 1, "don't": 1, "something's": 1, 'me': 1, 'sleep': 1, 'all': 1, 'no': 1, 'this': 2}
{'where': 2, 'coloured': 1, 'you': 4, "it's": 2, 'that': 1, 'for': 2, 'the': 11, 'along': 2, 'are': 3, 'read': 5, 'sign': 3, "you're": 1, 'everyone': 3, 'above': 3, 'anyone': 1, 'eyes': 4, 'door': 3, 'like': 1, 'pages': 1, 'aisles': 2, 'individual': 1, 'not': 2, 'feel': 1, 'and': 1, 'ignore': 1, 'of': 1, 'one': 1, 'but': 1, 'makes': 1, 'down': 2, 'titles': 2, 'lines': 2, 'on': 1, 'your': 4, 'with': 2, 'spines': 1}
{'breath': 1, 'where': 3, 'you': 7, 'why': 6, 'streets': 1, 'be': 3, 'left': 2, 'mean': 3, 'can': 3, 'trees': 1, 'have': 3, 'that': 3, 'note': 2, 'mouth'

In [93]:
df = pd.DataFrame()


df['Album'] = artist_albums
# df['Year'] = []
df['Song'] = artist_songs
df['Rhyme Type'] = rhyme_types
df['Rhyme Ratio'] = rhyme_ratios
df['Metre'] = metres
df['Lyrics'] = artist_lyrics

In [94]:
df

Unnamed: 0,Album,Song,Rhyme Type,Rhyme Ratio,Metre,Lyrics
0,Work And Non Work (1997),Accidentals,no rhyme,0.5,trochaic tetrameter,Under the strain something's got to break \n I...
1,Work And Non Work (1997),The Book Lovers,rondeau rhyme,0.285714,trochaic tetrameter,Lines and lines \n Of the spines \n Coloured e...
2,Work And Non Work (1997),Message From Home,alternate rhyme,0.333333,trochaic tetrameter,I hope that you kept the note \n That I left f...
3,Work And Non Work (1997),Phantom,,,,False
4,Work And Non Work (1997),We've Got Time,shakespearean sonnet,0.32,iambic trimeter,"You and I \n Got something, why don't we try \..."
5,Work And Non Work (1997),Living Room,limerick,0.352941,trochaic tetrameter,You can see in miniature \n Everything is so s...
6,Work And Non Work (1997),According To No Plan,rondeau rhyme,0.375,trochaic tetrameter,"Oh, and I found myself lost \n Looked but nowh..."
7,Work And Non Work (1997),The World Backwards,alternate rhyme,0.4,trochaic tetrameter,"As we know, things aren't grow \n What's next ..."
8,Work And Non Work (1997),Lights Out,enclosed rhyme,0.310345,iambic trimeter,I'll show you for example \n A situation that'...
9,The Noise Made by People (2000),Long Was The Year,no rhyme,0.333333,iambic trimeter,"Long was the year, will you stay, now you're h..."


In [81]:
# Function was designed for another lyrics package, PyLyrics, that turned out to be broken. 
# Unfortunately pylyrics3 doesn't include stanzas in the lyrics it returns so the function is no longer useful.

# def avg_stanza_length(poem): 
#     """Get the average length of a stanza in a poem."""
#     total_stanza = pt.stanza_lengths(poem)
#     total_stanza = total_stanza.split(',')
#     total_stanza = list(map(int, total_stanza))
#     print(total_stanza)
#     avg_stanza = sum(total_stanza) / len(total_stanza)
#     return avg_stanza