## Goal: Scrap lyrics from lyrics.com for Artist 1

#### - import necessary libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
# url of artist 1 ( Of Monster and Men) from lyrics.com
url_a1 = 'https://www.lyrics.com/artist/Of-Monsters-and-Men/2552313'

In [3]:
# get response from the url
response_a1 = requests.get(url_a1)

In [4]:
# get all texts from the response
lyrics_a1 = response_a1.text

In [6]:
# save html in local
with open('lyrics_a1.html', 'w') as file: 
    file.write(response_a1.text)

In [7]:
# instantiate BeautifulSoup to read html structure
lyrics_a1_soup = BeautifulSoup(lyrics_a1)

In [8]:
# Let's explore the head of the html doc
lyrics_a1_soup.head.title.text

'Of Monsters and Men Lyrics, Songs and Albums | Lyrics.com'

In [24]:
lyrics_a1_soup.head.title.text.split(',')[0].replace(' Lyrics', '')

'Of Monsters and Men'

#### - scrap song titles of the artist 1 and links of lyrics for the respective songs

In [9]:
# html class "tal_qx" contains song title in href element
link_nd_song = lyrics_a1_soup.body.find_all(class_ ="tal qx")

In [10]:
link_nd_song

[<td class="tal qx"><strong><a href="/lyric/36182049/Of+Monsters+and+Men/Alligator">Alligator</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36406258/Of+Monsters+and+Men/Alligator">Alligator</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36406257/Of+Monsters+and+Men/Ahay">Ahay</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36406262/Of+Monsters+and+Men/R%C3%B3r%C3%B3r%C3%B3">Róróró</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36406261/Of+Monsters+and+Men/Waiting+for+the+Snow">Waiting for the Snow</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36406260/Of+Monsters+and+Men/Vulture%2C+Vulture">Vulture, Vulture</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36406259/Of+Monsters+and+Men/Wild+Roses">Wild Roses</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36406263/Of+Monsters+and+Men/Stuck+in+Gravity">Stuck in Gravity</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric

In [11]:
# create list of all songs titles and create complete links to access the songs' lyrics
link_a1 = []
song_a1 = []

for lyrics_a1 in link_nd_song:
    link_a1.append("https://www.lyrics.com/" + lyrics_a1.a['href'])
    song_a1.append(lyrics_a1.a.text)

In [12]:
link_a1

['https://www.lyrics.com//lyric/36182049/Of+Monsters+and+Men/Alligator',
 'https://www.lyrics.com//lyric/36406258/Of+Monsters+and+Men/Alligator',
 'https://www.lyrics.com//lyric/36406257/Of+Monsters+and+Men/Ahay',
 'https://www.lyrics.com//lyric/36406262/Of+Monsters+and+Men/R%C3%B3r%C3%B3r%C3%B3',
 'https://www.lyrics.com//lyric/36406261/Of+Monsters+and+Men/Waiting+for+the+Snow',
 'https://www.lyrics.com//lyric/36406260/Of+Monsters+and+Men/Vulture%2C+Vulture',
 'https://www.lyrics.com//lyric/36406259/Of+Monsters+and+Men/Wild+Roses',
 'https://www.lyrics.com//lyric/36406263/Of+Monsters+and+Men/Stuck+in+Gravity',
 'https://www.lyrics.com//lyric/36406265/Of+Monsters+and+Men/Sleepwalker',
 'https://www.lyrics.com//lyric/36406264/Of+Monsters+and+Men/Wars',
 'https://www.lyrics.com//lyric/36406256/Of+Monsters+and+Men/Under+a+Dome',
 'https://www.lyrics.com//lyric/36406255/Of+Monsters+and+Men/Soothsayer',
 'https://www.lyrics.com//lyric/36389820/Of+Monsters+and+Men/Wild+Roses',
 'https://www.

#### - create dataframe with song title and link of lyric

In [17]:
artist_1 = pd.DataFrame({'song_title': song_a1, 'l_link': link_a1})
artist_1.drop_duplicates(subset='song_title', inplace=True)
artist_1

Unnamed: 0,song_title,l_link
0,Alligator,https://www.lyrics.com//lyric/36182049/Of+Mons...
2,Ahay,https://www.lyrics.com//lyric/36406257/Of+Mons...
3,Róróró,https://www.lyrics.com//lyric/36406262/Of+Mons...
4,Waiting for the Snow,https://www.lyrics.com//lyric/36406261/Of+Mons...
5,"Vulture, Vulture",https://www.lyrics.com//lyric/36406260/Of+Mons...
6,Wild Roses,https://www.lyrics.com//lyric/36406259/Of+Mons...
7,Stuck in Gravity,https://www.lyrics.com//lyric/36406263/Of+Mons...
8,Sleepwalker,https://www.lyrics.com//lyric/36406265/Of+Mons...
9,Wars,https://www.lyrics.com//lyric/36406264/Of+Mons...
10,Under a Dome,https://www.lyrics.com//lyric/36406256/Of+Mons...


#### - test to scrap 1st lyric from the dataframe

In [None]:
song_a1_0 = requests.get('https://www.lyrics.com//lyric/36182049/Of+Monsters+and+Men/Alligator').text
song_a1_0

In [15]:
# song_a1_0 contains all texts from the html page of 1st song
# after exploring the texts, it was observed that class "lyric-body" within the body contains lyric part
# and the lyric part also requires cleaup to create complete text
song_a1_0_s = BeautifulSoup(requests.get('https://www.lyrics.com//lyric/36406257/Of+Monsters+and+Men/Alligator').text).body.find(class_="lyric-body").text.replace('\r', '').replace('\n', ' ')
song_a1_0_s

"I know that I'm wrong And now we're both sitting in silence But if it's alright I'll wrap myself around your cold shoulder  You think you know me But do you really? Something you do Something you say Oh, we don't talk about it You think you know me But do you really? There's something 'bout you Something 'bout you  (We don't talk about it)  A bull without horns Said nothing, so how come I'm blushing? And though you are warm The cold lingers on the tip of your fingers  You think you know me But do you really? Something you do Something you say Oh, we don't talk about it You think you know me But do you really? There's something 'bout you Something 'bout you  You couldn't change this mountain You've got me chasing the sound but I just hear fractions You couldn't change this mountain You've got me facing the sun but I just need direction  You think you know me But do you really? Something you do Something you say Oh, we don't talk about it You think you know me But do you really? There's

#### - extract all lyrics and add to the dataframe

In [18]:
# create list of all lyrics based on the song title and link from the dataframe
lyric_a1 = []
for i, j in zip(artist_1['song_title'], artist_1['l_link']):
    lyric_a1.append(BeautifulSoup(requests.get(j).text).body.find(class_="lyric-body").text.replace('\n',' ').replace('\r',''))

In [26]:
# add the list into artist_1 dataframe and add the name of artist
artist_1['lyrics'] = lyric_a1
artist_1['artist_name'] = lyrics_a1_soup.head.title.text.split(',')[0].replace(' Lyrics', '')
artist_1

Unnamed: 0,song_title,l_link,lyrics,artist_name
0,Alligator,https://www.lyrics.com//lyric/36182049/Of+Mons...,Hey Hey I see color raining down Feral feelin...,Of Monsters and Men
2,Ahay,https://www.lyrics.com//lyric/36406257/Of+Mons...,I know that I'm wrong And now we're both sitti...,Of Monsters and Men
3,Róróró,https://www.lyrics.com//lyric/36406262/Of+Mons...,I am up with the sunrise I'm tired and I think...,Of Monsters and Men
4,Waiting for the Snow,https://www.lyrics.com//lyric/36406261/Of+Mons...,I'm waiting for the snow I'm waiting for visio...,Of Monsters and Men
5,"Vulture, Vulture",https://www.lyrics.com//lyric/36406260/Of+Mons...,"He lives in the darkness, he's calling my name...",Of Monsters and Men
6,Wild Roses,https://www.lyrics.com//lyric/36406259/Of+Mons...,Wild roses on a bed of leaves in the month of ...,Of Monsters and Men
7,Stuck in Gravity,https://www.lyrics.com//lyric/36406263/Of+Mons...,I'm stuck in gravity I'm far from where I wann...,Of Monsters and Men
8,Sleepwalker,https://www.lyrics.com//lyric/36406265/Of+Mons...,"I start a war, no time to think about it It mi...",Of Monsters and Men
9,Wars,https://www.lyrics.com//lyric/36406264/Of+Mons...,How does it feel To lie alone in your bed When...,Of Monsters and Men
10,Under a Dome,https://www.lyrics.com//lyric/36406256/Of+Mons...,Under a dome While we wait for it We feel all ...,Of Monsters and Men


In [27]:
# export to csv
artist_1.to_csv('artist_1.csv')