## Goal: Scrap lyrics from lyrics.com for Artist 2

#### - import necessary libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [3]:
# url of artist 2 ( The Lumineers) from lyrics.com
url_a2 = 'https://www.lyrics.com/artist/The-Lumineers/2576180'

In [4]:
# get response from the url
response_a2 = requests.get(url_a2)

In [5]:
# get all texts from the response
lyrics_a2 = response_a2.text

In [6]:
# save html in local
with open('lyrics_a2.html', 'w') as file: 
    file.write(response_a2.text)

In [7]:
# instantiate BeautifulSoup to read html structure
lyrics_a2_soup = BeautifulSoup(lyrics_a2)

In [8]:
# Let's explore the head of the html doc
lyrics_a2_soup.head.title.text

'The Lumineers Lyrics, Songs and Albums | Lyrics.com'

In [9]:
lyrics_a2_soup.head.title.text.split(',')[0].replace(' Lyrics', '')

'The Lumineers'

#### - scrap song titles of the artist 1 and links of lyrics for the respective songs

In [10]:
# html class "tal_qx" contains song title in href element
link_nd_song = lyrics_a2_soup.body.find_all(class_ ="tal qx")

In [11]:
link_nd_song

[<td class="tal qx"><strong><a href="/lyric/36157087/The+Lumineers/Nightshade">Nightshade</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36137076/The+Lumineers/Gloria">Gloria</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36215634/The+Lumineers/Donna">Donna</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36215633/The+Lumineers/Life+in+the+City">Life in the City</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36215632/The+Lumineers/Gloria">Gloria</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36278108/The+Lumineers/Donna">Donna</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36278109/The+Lumineers/Life+in+the+City">Life in the City</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36278111/The+Lumineers/It+Wasn%27t+Easy+to+Be+Happy+for+You">It Wasn't Easy to Be Happy for You</a></strong></td>,
 <td class="tal qx"><strong><a href="/lyric/36278112/The+Lumineers/Leader+of+the+Landslide

In [12]:
# create list of all songs titles and create complete links to access the songs' lyrics
link_a2 = []
song_a2 = []

for lyrics_a2 in link_nd_song:
    link_a2.append("https://www.lyrics.com/" + lyrics_a2.a['href'])
    song_a2.append(lyrics_a2.a.text)

In [13]:
link_a2

['https://www.lyrics.com//lyric/36157087/The+Lumineers/Nightshade',
 'https://www.lyrics.com//lyric/36137076/The+Lumineers/Gloria',
 'https://www.lyrics.com//lyric/36215634/The+Lumineers/Donna',
 'https://www.lyrics.com//lyric/36215633/The+Lumineers/Life+in+the+City',
 'https://www.lyrics.com//lyric/36215632/The+Lumineers/Gloria',
 'https://www.lyrics.com//lyric/36278108/The+Lumineers/Donna',
 'https://www.lyrics.com//lyric/36278109/The+Lumineers/Life+in+the+City',
 'https://www.lyrics.com//lyric/36278111/The+Lumineers/It+Wasn%27t+Easy+to+Be+Happy+for+You',
 'https://www.lyrics.com//lyric/36278112/The+Lumineers/Leader+of+the+Landslide',
 'https://www.lyrics.com//lyric/36278113/The+Lumineers/Left+for+Denver',
 'https://www.lyrics.com//lyric/36278101/The+Lumineers/My+Cell',
 'https://www.lyrics.com//lyric/36278102/The+Lumineers/Jimmy+Sparks',
 'https://www.lyrics.com//lyric/36521346/The+Lumineers/Democracy+%5B%2A%5D',
 'https://www.lyrics.com//lyric/36521345/The+Lumineers/Old+Lady+%5B%2A

#### - create dataframe with song title and link of lyric

In [14]:
artist_2 = pd.DataFrame({'song_title': song_a2, 'l_link': link_a2})
artist_2.drop_duplicates(subset='song_title', inplace=True)
artist_2

Unnamed: 0,song_title,l_link
0,Nightshade,https://www.lyrics.com//lyric/36157087/The+Lum...
1,Gloria,https://www.lyrics.com//lyric/36137076/The+Lum...
2,Donna,https://www.lyrics.com//lyric/36215634/The+Lum...
3,Life in the City,https://www.lyrics.com//lyric/36215633/The+Lum...
7,It Wasn't Easy to Be Happy for You,https://www.lyrics.com//lyric/36278111/The+Lum...
...,...,...
157,where we are [Acoustic],https://www.lyrics.com//lyric-lf/7526166/The+L...
163,Democracy,https://www.lyrics.com//lyric/36521451/The+Lum...
167,Old Lady,https://www.lyrics.com//lyric/36521460/The+Lum...
168,Soundtrack Song,https://www.lyrics.com//lyric/36521450/The+Lum...


#### - test to scrap 1st lyric from the dataframe

In [None]:
song_a2_0 = requests.get('https://www.lyrics.com//lyric/36157087/The+Lumineers/Nightshade').text
song_a2_0

In [16]:
# song_a2_0 contains all texts from the html page of 1st song
# after exploring the texts, it was observed that class "lyric-body" within the body contains lyric part
# and the lyric part also requires cleaup to create complete text
song_a2_0_s = BeautifulSoup(requests.get('https://www.lyrics.com//lyric/36157087/The+Lumineers/Nightshade').text).body.find(class_="lyric-body").text.replace('\r', '').replace('\n', ' ')
song_a2_0_s

"Some of us love to think of life as a game Others are born into the royalty and can never behave Some find a freedom locking men in a cage, oh no Some priest insisted it's a test of the faith And only holy devotion can keep the devils away There's only so much that a man can pray, oh no  Oh, my love Oh, my love Spare my blood Spare my blood  I saw the footprints in the white of the snow I counted thousands and I was just a rider alone Eighteen degrees and eighty miles from home, oh no We built a wall to keep the devils at bay A distant army was marching through the night and the day I swore an oath and with my life I would pay, oh no  Oh, my love Oh, my love Spare my blood  Nightshade, won't you take me away? And I can hold my breath for another day But it was all, it was all for the throne  We were surrounded and the ending was near All of the sudden, a ranger arrived, a savior appeared Made it to safety, but the devil was here, oh no  Oh, my love Nightshade, won't you take me away? 

#### - extract all lyrics and add to the dataframe

In [17]:
# create list of all lyrics based on the song title and link from the dataframe
lyric_a2 = []
for i, j in zip(artist_2['song_title'], artist_2['l_link']):
    lyric_a2.append(BeautifulSoup(requests.get(j).text).body.find(class_="lyric-body").text.replace('\n',' ').replace('\r',''))

In [18]:
# add the list into artist_1 dataframe and add the name of artist
artist_2['lyrics'] = lyric_a2
artist_2['artist_name'] = lyrics_a2_soup.head.title.text.split(',')[0].replace(' Lyrics', '')
artist_2

Unnamed: 0,song_title,l_link,lyrics,artist_name
0,Nightshade,https://www.lyrics.com//lyric/36157087/The+Lum...,Some of us love to think of life as a game Oth...,The Lumineers
1,Gloria,https://www.lyrics.com//lyric/36137076/The+Lum...,"Gloria, I smell it on your breath Gloria, booz...",The Lumineers
2,Donna,https://www.lyrics.com//lyric/36215634/The+Lum...,It's not the words you say but how you say it ...,The Lumineers
3,Life in the City,https://www.lyrics.com//lyric/36215633/The+Lum...,"Woo, woo Woo, woo And if the city's skyscrape...",The Lumineers
7,It Wasn't Easy to Be Happy for You,https://www.lyrics.com//lyric/36278111/The+Lum...,On the last time we met Your love was dead You...,The Lumineers
...,...,...,...,...
157,where we are [Acoustic],https://www.lyrics.com//lyric-lf/7526166/The+L...,Where we are (where we are) I don't know where...,The Lumineers
163,Democracy,https://www.lyrics.com//lyric/36521451/The+Lum...,It's coming through a hole in the air From tho...,The Lumineers
167,Old Lady,https://www.lyrics.com//lyric/36521460/The+Lum...,An old lady crosses the street And as she wave...,The Lumineers
168,Soundtrack Song,https://www.lyrics.com//lyric/36521450/The+Lum...,"Loneliness, oh won't you let me be Let me be a...",The Lumineers


In [19]:
# export to csv
artist_2.to_csv('artist_2.csv')