## 1. Reframing data

In [1]:
import pandas as pd
import os

In [2]:
BASE_DIR = "processed_data/"
ANNOTATIONS_FILE = os.path.join(BASE_DIR, "CAL500_noAudioFeatures/hardAnnotations.txt")
SONGNAMES_FILE = os.path.join(BASE_DIR, "CAL500_noAudioFeatures/songNames.txt")
VOCAB_FILE = os.path.join(BASE_DIR, "CAL500_noAudioFeatures/vocab.txt")
SONGNAMES_TAGS_FILE = os.path.join(BASE_DIR, "cal500_song_tag_annotations.txt")

In [3]:
annotations = pd.read_csv(ANNOTATIONS_FILE, header=None)
vocab_file = pd.read_csv(VOCAB_FILE, sep="\t", header=None)
songnames = pd.read_csv(SONGNAMES_FILE, sep="\t", header=None)
songames_tags = pd.read_csv(SONGNAMES_TAGS_FILE, sep="\t", header=None, names=["id", "tags"])

In [4]:
print("",
    "Annotations shape: ", annotations.shape, "\n",
    "Vocab shape: ", vocab_file.shape, "\n",
    "Songnames shape: ", songnames.shape
)

 Annotations shape:  (502, 174) 
 Vocab shape:  (174, 1) 
 Songnames shape:  (502, 1)


In [5]:
annotations.index = songnames[0]
annotations.columns = vocab_file[0]
annotations

Unnamed: 0_level_0,Emotion-Angry_/_Agressive,NOT-Emotion-Angry_/_Agressive,Emotion-Arousing_/_Awakening,NOT-Emotion-Arousing_/_Awakening,Emotion-Bizarre_/_Weird,NOT-Emotion-Bizarre_/_Weird,Emotion-Calming_/_Soothing,NOT-Emotion-Calming_/_Soothing,Emotion-Carefree_/_Lighthearted,NOT-Emotion-Carefree_/_Lighthearted,...,Genre-Best-World,Instrument_-_Acoustic_Guitar-Solo,Instrument_-_Electric_Guitar_(clean)-Solo,Instrument_-_Electric_Guitar_(distorted)-Solo,Instrument_-_Female_Lead_Vocals-Solo,Instrument_-_Harmonica-Solo,Instrument_-_Male_Lead_Vocals-Solo,Instrument_-_Piano-Solo,Instrument_-_Saxophone-Solo,Instrument_-_Trumpet-Solo
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10cc-for_you_and_i,0,1,0,1,0,1,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2pac-trapped,1,0,1,0,0,0,0,1,0,1,...,0,0,0,0,0,0,1,0,0,0
5th_dimension-one_less_bell_to_answer,0,0,0,0,0,1,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
a_tribe_called_quest-bonita_applebum,0,1,0,1,0,1,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
aaron_neville-tell_it_like_it_is,0,1,0,1,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
yo_la_tengo-tom_courtenay,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
young_mc-bust_a_move,0,1,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
young_rascals-baby_lets_wait,0,1,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zapp-dance_floor,0,1,0,0,0,1,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
annotations.to_csv(os.path.join(BASE_DIR, "CLEANED_cal500_annotations.csv"))

## Generate a dataset of song names and their corresponding lyrics URL.

In [7]:
CLEANED_songnames = (
    songnames[0]
    .str.replace("_", " ")
    .str.replace("-", " - ")
    )
CLEANED_songnames

0                         10cc - for you and i
1                               2pac - trapped
2      5th dimension - one less bell to answer
3       a tribe called quest - bonita applebum
4           aaron neville - tell it like it is
                        ...                   
497                yo la tengo - tom courtenay
498                     young mc - bust a move
499             young rascals - baby lets wait
500                         zapp - dance floor
501                   zombies - beechwood park
Name: 0, Length: 502, dtype: object

In [8]:
from modules.lyric_scraper import get_lyrics
from tqdm import tqdm
from multiprocessing import Pool

In [10]:
# Create a pool of worker processes with max_workers set to 3
def fetch_lyrics_from_series(songnames_series):
    with Pool(4) as p:
        # Use the pool to apply the fetch_lyrics function to each song in parallel
        fetched_lyrics = list(tqdm(p.imap(get_lyrics, songnames_series), total=len(songnames_series)))
        return fetched_lyrics
# Print the fetched lyrics
fetch_lyrics_from_series(CLEANED_songnames[:15])

  0%|          | 0/15 [00:00<?, ?it/s]

100%|██████████| 15/15 [00:07<00:00,  2.07it/s]


[{'song_name': 'For You and I',
  'artist_name': '10cc',
  'lyrics': "[Verse 1]\nThe world is full of other people (Ooh ooh)\nTake a look around\nWe're quick to laugh when they've got troubles (Ooh ooh)\nAnd we put 'em down\n[Pre-Chorus]\nWe go\nWe're not so hot you know\nIt's like a roller coaster\nOn a downward motion\nOn a one way street\nWe can't control our feet\nWe're on the road to ruin\nDon't know what we're doin'\n[Chorus]\nShe may be quiet\nShe may be shy\nBut don't you bring her down\nDon't pass her by\nThe world of wisdom\nBehind her eyes\nCould answer everything\nBut you and I\n[Verse 2]\nWe seem to wander in a daydream (Ooh ooh)\nWe never look around\nThe world is full of other people (Ooh ooh)\nAnd we put 'em down\n[Pre-Chorus]\nWe go\nWe're not so hot you know\nIt's like a roller coaster\nOn a downward motion\nOn a one way street\nWe can't control our feet\nWe're on the road to ruin\nDon't know what we're doin'\n[Chorus]\nShe's not beautiful\nBut don't make it plain\nYo

In [None]:
fetched_lyrics[1]

{'song_name': 'Trapped',
 'artist_name': '2Pac',
 'lyrics': '[Verse 1: 2Pac]\nYou know they got me trapped in this prison of seclusion\nHappiness, living on the streets is a delusion\nEven a smooth criminal one day must get caught\nShot up or shot down with the bullet that he bought\nNine-millimeter kickin\' thinkin\' about what the streets do to me\n\'Cause they never taught peace in the Black community\nAll we know is violence, do the job in silence\nWalk the city streets like a rat pack of tyrants\nToo many brothers daily headed for the big pen\'\nNiggas comin\' out worse-off than when they went in\nOver the years I\'ve done a lot of growin\' up\nGettin\' drunk, throwin\' up, cuffed up, then I said I had enough\nThere must be another route, way out to money and fame\nI changed my name, played a different game\nTired of being trapped in this vicious cycle\nIf one more cop harasses me, I just might go psycho\nAnd when I get \'em, I\'ll hit \'em with the bum rush\nOnly a lunatic would 