# Project 4: Web scraping and text classification

The notebook explains how to create a ML classification model to predict the author of the lyrics scraped from the website https://www.lyrics.com. The notebook was developed as as study project for the Spiced Academy Data Science Bootcamp.

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import os
import csv
import pandas as pd
import glob
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

## Web scraping author 1 (Bob Dylan)

In [17]:
#scraping Bob Dylan's songs
url_dylan = 'https://www.lyrics.com/artist/Bob-Dylan/4147'

In [18]:
response_dylan = requests.get(url_dylan)

In [19]:
#checking the status of the request
response_dylan.status_code

200

In [20]:
html_dylan = response_dylan.text

In [21]:
parsed_html_dylan = BeautifulSoup(html_dylan)

In [22]:
links_dylan = parsed_html_dylan.find_all('a')


In [23]:
# function to format the artist's name as included in the lyrics' links
# the name is inputed as a string and the white spaces are used to split the name

def format_artist_name (name):
    name_parts = name.split()
    name_length = len(name_parts)
    author_name = []
    for x in range (0, name_length):
        author_name.append(name_parts[x])
    author_str =""
    result= "+".join(author_name, )
    return result

In [24]:
dylan = format_artist_name("Bob Dylan")

In [25]:
# function to extract the urls for the lyric webpages

def get_lyric_url(links, artist):
    artist_name = format_artist_name(artist)
    
    lyric_url_list = []
    for link in links:
        try:
            if artist_name in link.get('href'):
                part_url = link.get('href')
                lyric_url_list.append("https://www.lyrics.com/track/" + part_url[7:])
        except:
            continue
    
    return lyric_url_list
    
       

lyric_urls_dylan = get_lyric_url(links_dylan, "Bob Dylan")


In [27]:
def get_lyrics(lyric_urls, artist):
    artist_name = format_artist_name(artist)
    current_wd = os.getcwd()
    new_d_name = artist_name.lower().replace("+", "_")
    artist_d = os.mkdir(current_wd+'/'+new_d_name)
    
    metadata = [["song_num", "song_title"]]
    for url in lyric_urls:
        r = requests.get(url)
        html_songpage = r.text
        parsed_html_songpage = BeautifulSoup(html_songpage)
        try:
            song_title = parsed_html_songpage.find_all("h1", attrs={'class': 'lyric-title'})[0].text
            song_text = parsed_html_songpage.find_all("pre")[0].text #, attrs={'class': 'lyric-body wselect-cnt'})
            filename = re.search("\d+",url).group()
            metadata_song = [filename,song_title]
            metadata.append(metadata_song)
            path = current_wd + '/'+ new_d_name + '/' + filename+".txt"
        
            with open(path, 'w') as file:
                file.write(song_text)
        except:
            continue
    
    path_metadata = current_wd + '/'+ new_d_name + '/' + "metadata" + ".csv"
    with open(path_metadata, 'w') as csv_file:
        csv_file_writer = csv.writer(csv_file)
        csv_file_writer.writerows(metadata)
        
        
        
        
          


In [None]:
#collecting Dylan's song lyrics
dylan_lyrics = get_lyrics(lyric_urls_dylan, "Bob Dylan")

## Web scraping author 2 (Bruce Springsteen)

In [319]:
#scraping Bruce Springsteen's songs
url_sprin = 'https://www.lyrics.com/artist/Bruce-Springsteen/5505'
response_sprin = requests.get(url_sprin)
html_sprin = response_sprin.text
parsed_html_sprin = BeautifulSoup(html_sprin)
links_sprin = parsed_html_sprin.find_all('a')
holiday = format_artist_name("Bruce Springsteen")
lyric_urls_sprin = get_lyric_url(links_sprin, "Bruce Springsteen")
sprin_lyrics = get_lyrics(lyric_urls_sprin, "Bruce Springsteen")

NameError: name 'format_artist_name' is not defined

## Reading the scraped data

In [2]:
# Reading the Bob Dylan's data in a df
metadata_path_dylan = os.getcwd() + '/'+ "bob_dylan" + '/' + "metadata.csv"

#Generating a dataframe for the songs metadata
df_metadata_dylan = pd.read_csv(metadata_path_dylan)
df_metadata_dylan['author']= 'Bob Dylan'

#def read_metadata_author(author):
    

In [3]:
print(df_metadata_dylan.dtypes)

song_num       int64
song_title    object
author        object
dtype: object


In [4]:
df_metadata_dylan.head(3)

Unnamed: 0,song_num,song_title,author
0,36145301,Buckets of Rain,Bob Dylan
1,36145307,Idiot Wind,Bob Dylan
2,36145303,"If You See Her, Say Hello",Bob Dylan


In [5]:
txt_files = glob.glob("bob_dylan/*.txt")
current_wd = os.getcwd()

lyrics_dict_dylan ={ }
for file in txt_files:
    filename = re.search(r"\d+", file).group()
    txt_filename = filename + ".txt"
    with open (os.path.join(current_wd,"bob_dylan", txt_filename), 'r+') as f:
        lyrics = f.read()
        lyrics_dict_dylan.update({filename:lyrics})
        
df_lyrics_dylan = pd.DataFrame.from_dict(lyrics_dict_dylan, orient='index').reset_index()
df_lyrics_dylan.columns =["song_num", "song_lyrics"]
df_lyrics_dylan['song_num']= df_lyrics_dylan['song_num'].astype(np.int64)
df_lyrics_dylan.head(3)
print(df_lyrics_dylan.dtypes)



#def read_file_content(txt_folder):
    
    

song_num        int64
song_lyrics    object
dtype: object


In [6]:
df_dylan = df_metadata_dylan.merge(df_lyrics_dylan)

df_dylan.head(3)
print(df_dylan.shape)

(6546, 4)


In [7]:
# Reading the Bruce Springsteen's data in a df
metadata_path_sprin = os.getcwd() + '/'+ "bruce_springsteen" + '/' + "metadata.csv"
df_metadata_sprin = pd.read_csv(metadata_path_sprin)
df_metadata_sprin['author']= 'Bruce Springsteen'
txt_files = glob.glob("bruce_springsteen/*.txt")
current_wd = os.getcwd()

lyrics_dict_sprin ={ }
for file in txt_files:
    filename = re.search(r"\d+", file).group()
    txt_filename = filename + ".txt"
    with open (os.path.join(current_wd,"bruce_springsteen", txt_filename), 'r+') as f:
        lyrics = f.read()
        lyrics_dict_sprin.update({filename:lyrics})
        
df_lyrics_sprin = pd.DataFrame.from_dict(lyrics_dict_sprin, orient='index').reset_index()
df_lyrics_sprin.columns =["song_num", "song_lyrics"]
df_lyrics_sprin['song_num']= df_lyrics_sprin['song_num'].astype(np.int64)
df_lyrics_sprin.head(3)

print(df_lyrics_sprin.dtypes)
df_sprin = df_metadata_sprin.merge(df_lyrics_sprin)
print(df_sprin.shape)
df_sprin.head(3
              )


song_num        int64
song_lyrics    object
dtype: object
(2706, 4)


Unnamed: 0,song_num,song_title,author,song_lyrics
0,36456915,Dancing in the Dark,Bruce Springsteen,I get up in the evenin'\nAnd I ain't got nothi...
1,36456910,Badlands,Bruce Springsteen,Lights out tonight\nTrouble in the heartland\n...
2,36456909,Cover Me,Bruce Springsteen,"The times are tough now, just getting tougher\..."


In [8]:
# Joining the two authors dataframes
df = pd.concat([df_dylan, df_sprin])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(["author"], axis=1), df["author"],
                                                    test_size=0.2, random_state=42)

In [10]:
X_train.head(3)

Unnamed: 0,song_num,song_title,song_lyrics
4756,22184408,Jokerman,Standing on the waters casting your bread\nWhi...
2302,21226112,The Times They Are a-Changin',Come gather 'round people\nWherever you roam\n...
5278,772620,Emotionally Yours,"Come baby, find me, come baby, remind me of wh..."


In [11]:
X_train['cleaned_lyric'] = X_train['song_lyrics'].str.replace("\d+|\n|[.,:;!?]|\s", " ")

In [12]:
text_list = X_train['cleaned_lyric'].tolist()

In [13]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/giudittaparolini/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [14]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/giudittaparolini/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [15]:
nltk_stopwords = stopwords.words("english")

In [16]:
my_stopwords = nltk_stopwords+['aa']

In [17]:
tokens_list = [word_tokenize(text) for text in text_list]

In [18]:
cleaned_tokens = []
for text in tokens_list:
    cleaned_tokens_subl = []
    for token in text:
        if (token not in my_stopwords) and (len(token)>3):
            cleaned_tokens_subl.append(token)
    cleaned_tokens.append(cleaned_tokens_subl)
        

In [19]:
cleaned_tokens_str_format = []
for element in cleaned_tokens:
    my_str = " ".join(map(str, element))
    cleaned_tokens_str_format.append(my_str)



In [20]:
X_train['cleaned_lyric'] = X_train['cleaned_lyric'].astype("string")

In [21]:
X_train.dtypes

song_num          int64
song_title       object
song_lyrics      object
cleaned_lyric    string
dtype: object

In [22]:
y_train.head(3)

4756    Bob Dylan
2302    Bob Dylan
5278    Bob Dylan
Name: author, dtype: object

In [23]:
list_cleaned_lyrics = X_train['cleaned_lyric'].tolist()

In [24]:

labels = y_train.tolist()

In [25]:
vect = TfidfVectorizer()
X =  vect.fit_transform(list_cleaned_lyrics)

In [26]:
X.shape

(7401, 11192)

In [27]:
pd.DataFrame(X.todense(), index=labels)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11182,11183,11184,11185,11186,11187,11188,11189,11190,11191
Bob Dylan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bob Dylan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bob Dylan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bob Dylan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bob Dylan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Bob Dylan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bob Dylan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bob Dylan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bob Dylan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Fitting a logistic regression model

In [77]:
model_lr = LogisticRegression().fit(X, y_train)

In [78]:
model_lr.score(X, y_train)

0.9847317930009458

In [79]:
X_test['cleaned_lyric'] = X_test['song_lyrics'].str.replace("\d+|\n|[.,:;!?]|\s", " ")

In [80]:
text_list_test = X_test['cleaned_lyric'].tolist()

In [81]:
tokens_list_test = [word_tokenize(text) for text in text_list_test]

In [82]:
cleaned_tokens_test = []
for text in tokens_list_test:
    cleaned_tokens_subl = []
    for token in text:
        if (token not in my_stopwords) and (len(token)>3):
            cleaned_tokens_subl.append(token)
    cleaned_tokens_test.append(cleaned_tokens_subl)

In [83]:
cleaned_tokens_str_format_test = []
for element in cleaned_tokens_test:
    my_str = " ".join(map(str, element))
    cleaned_tokens_str_format_test.append(my_str)

In [84]:
X_test['cleaned_lyric'] = pd.Series(data=cleaned_tokens_str_format_test, index = X_test.index)

In [85]:
list_cleaned_lyrics_test = X_test['cleaned_lyric'].tolist()

In [86]:
X_test_transformed = vect.transform(list_cleaned_lyrics_test)

In [87]:
X_test_transformed.shape

(1851, 11192)

In [88]:
ypred = model_lr.predict(X_test_transformed)    

In [89]:
ypred.shape

(1851,)

In [90]:
model_lr.score(X_test_transformed,y_test)

0.9567801188546732

In [91]:
probs = model_lr.predict_proba(X_test_transformed)  



In [92]:
df_probs = pd.DataFrame(probs)

In [93]:
type(ypred)

numpy.ndarray

In [94]:
ser_ypred = pd.Series(ypred) 

In [95]:
pred_df = df_probs.merge(ser_ypred.rename('ypred'), left_index=True, right_index=True)

In [96]:
pred_df

Unnamed: 0,0,1,ypred
0,0.919401,0.080599,Bob Dylan
1,0.869043,0.130957,Bob Dylan
2,0.141812,0.858188,Bruce Springsteen
3,0.029374,0.970626,Bruce Springsteen
4,0.828357,0.171643,Bob Dylan
...,...,...,...
1846,0.858517,0.141483,Bob Dylan
1847,0.829871,0.170129,Bob Dylan
1848,0.871338,0.128662,Bob Dylan
1849,0.187011,0.812989,Bruce Springsteen


In [97]:
y_test.index = pred_df.index

In [98]:
pred_df['y_true'] = y_test

In [99]:
pred_df

Unnamed: 0,0,1,ypred,y_true
0,0.919401,0.080599,Bob Dylan,Bob Dylan
1,0.869043,0.130957,Bob Dylan,Bob Dylan
2,0.141812,0.858188,Bruce Springsteen,Bruce Springsteen
3,0.029374,0.970626,Bruce Springsteen,Bruce Springsteen
4,0.828357,0.171643,Bob Dylan,Bob Dylan
...,...,...,...,...
1846,0.858517,0.141483,Bob Dylan,Bob Dylan
1847,0.829871,0.170129,Bob Dylan,Bob Dylan
1848,0.871338,0.128662,Bob Dylan,Bob Dylan
1849,0.187011,0.812989,Bruce Springsteen,Bruce Springsteen


In [100]:
pred_df.index

RangeIndex(start=0, stop=1851, step=1)

In [101]:
X_test.index

Int64Index([ 586, 4890, 2559, 1630, 3064, 1145, 6145,  289,  620, 6066,
            ...
             297, 1578, 5356,  445, 2377, 3487, 6156, 5255, 2217, 2650],
           dtype='int64', length=1851)

In [102]:
pred_check = pd.concat([X_test.reset_index(drop=True),pred_df.reset_index(drop=True)], axis=1)


In [103]:
pred_check

Unnamed: 0,song_num,song_title,song_lyrics,cleaned_lyric,0,1,ypred,y_true
0,32346969,Like a Rolling Stone [Take 5] [remake] [Rehear...,Once upon a time you dressed so fine\nThrew th...,Once upon time dressed fine Threw bums dime pr...,0.919401,0.080599,Bob Dylan,Bob Dylan
1,1906865,You Ain't Going Nowhere,Clouds so swift\nRain won’t lift\nGate won’t c...,Clouds swift Rain lift Gate close Railings fro...,0.869043,0.130957,Bob Dylan,Bob Dylan
2,14251760,Adam Raised a Cain,In the summer that I was baptized\nMy father h...,summer baptized father held side water said cr...,0.141812,0.858188,Bruce Springsteen,Bruce Springsteen
3,8308531,Darkness on the Edge of Town,Well they're still racing out at the trestles\...,Well still racing trestles blood never burned ...,0.029374,0.970626,Bruce Springsteen,Bruce Springsteen
4,5440027,I Shall Be Released,They say every man must need protection\nThey ...,They every must need protection They every mus...,0.828357,0.171643,Bob Dylan,Bob Dylan
...,...,...,...,...,...,...,...,...
1846,8304717,Can't Wait,I can't wait\nWait for you to change your mind...,wait Wait change mind late tryin walk line Wel...,0.858517,0.141483,Bob Dylan,Bob Dylan
1847,5059002,Just Like Tom Thumb's Blues,When you're lost in the rain in Juarez when it...,When lost rain Juarez Easter time gravity fail...,0.829871,0.170129,Bob Dylan,Bob Dylan
1848,2598857,Up to Me [#],Everything went from bad to worse\nMoney never...,Everything went worse Money never changed thin...,0.871338,0.128662,Bob Dylan,Bob Dylan
1849,217862,Stand on It,Well Jimmy Lee was hookin' 'round the far turn...,Well Jimmy hookin 'round turn funky southern F...,0.187011,0.812989,Bruce Springsteen,Bruce Springsteen


In [104]:
compare = pred_check[pred_check['ypred'] != pred_check['y_true'] ]

In [105]:
compare

Unnamed: 0,song_num,song_title,song_lyrics,cleaned_lyric,0,1,ypred,y_true
26,12299174,Someday Baby,"I don't care what you do, I don't care what yo...",care care care long stay Someday baby worry We...,0.460608,0.539392,Bruce Springsteen,Bob Dylan
47,35896423,I Want You [Take 1] [Take],The guilty undertaker sighs\nThe lonesome orga...,guilty undertaker sighs lonesome organ grinder...,0.487337,0.512663,Bruce Springsteen,Bob Dylan
100,8304636,I Want You,The guilty undertaker sighs\nThe lonesome orga...,guilty undertaker sighs lonesome organ grinder...,0.487337,0.512663,Bruce Springsteen,Bob Dylan
137,9214666,I Want You,The guilty undertaker sighs\nThe lonesome orga...,guilty undertaker sighs lonesome organ grinder...,0.487337,0.512663,Bruce Springsteen,Bob Dylan
174,30084273,My Wife's Home Town,"Well, I didn't come here to deal with a doggon...",Well come deal doggone thing came hear drop cy...,0.492686,0.507314,Bruce Springsteen,Bob Dylan
...,...,...,...,...,...,...,...,...
1690,5620370,Let's Be Friends (Skin to Skin),I been watchin' you a long time\nTrying to fig...,watchin long time Trying figure moving line ti...,0.710939,0.289061,Bob Dylan,Bruce Springsteen
1722,32625921,I Believe in Miracles,I used to be on an endless run;\nBelieved in m...,used endless Believed miracles 'cause blessed ...,0.501310,0.498690,Bob Dylan,Bruce Springsteen
1812,24940019,Don't Fall Apart On Me Tonight,"Just a minute before you leave, girl\nJust a m...",Just minute leave girl Just minute touch door ...,0.324338,0.675662,Bruce Springsteen,Bob Dylan
1836,23366984,Awake [Live],A beautiful and blinding morning\nThe world ou...,beautiful blinding morning world outside begin...,0.869281,0.130719,Bob Dylan,Bruce Springsteen


In [106]:
compare.shape

(80, 8)

## Testing user input

In [108]:
user = input()

 


In [None]:
user_t = vect.transform( [user] ) 

In [None]:
model.predict(user_t)

## Naive Bayes Model

In [109]:
model_nb = MultinomialNB().fit(X, y_train)

In [110]:
model_nb.score(X, y_train)

0.9548709633833266

In [111]:
ypred_nb = model_nb.predict(X_test_transformed)    

In [112]:
model_nb.score(X_test_transformed,y_test)

0.9621826039978391

In [113]:
probs_nb = model_nb.predict_proba(X_test_transformed)  

In [114]:
df_probs_nb = pd.DataFrame(probs_nb)

In [115]:
df_probs_nb

Unnamed: 0,0,1
0,0.999892,0.000108
1,0.976424,0.023576
2,0.085451,0.914549
3,0.030276,0.969724
4,0.980461,0.019539
...,...,...
1846,0.975277,0.024723
1847,0.995585,0.004415
1848,0.991458,0.008542
1849,0.082754,0.917246


In [116]:
pred_df_nb = df_probs_nb.merge(ser_ypred.rename('ypred'), left_index=True, right_index=True)

In [117]:
pred_df_nb['y_true'] = y_test

In [118]:
pred_check_nb = pd.concat([X_test.reset_index(drop=True),pred_df_nb.reset_index(drop=True)], axis=1)

In [119]:
compare_nb = pred_check_nb[pred_check_nb['ypred'] != pred_check_nb['y_true'] ]

In [120]:
compare_nb

Unnamed: 0,song_num,song_title,song_lyrics,cleaned_lyric,0,1,ypred,y_true
26,12299174,Someday Baby,"I don't care what you do, I don't care what yo...",care care care long stay Someday baby worry We...,0.798509,0.201491,Bruce Springsteen,Bob Dylan
47,35896423,I Want You [Take 1] [Take],The guilty undertaker sighs\nThe lonesome orga...,guilty undertaker sighs lonesome organ grinder...,0.981926,0.018074,Bruce Springsteen,Bob Dylan
100,8304636,I Want You,The guilty undertaker sighs\nThe lonesome orga...,guilty undertaker sighs lonesome organ grinder...,0.981926,0.018074,Bruce Springsteen,Bob Dylan
137,9214666,I Want You,The guilty undertaker sighs\nThe lonesome orga...,guilty undertaker sighs lonesome organ grinder...,0.981926,0.018074,Bruce Springsteen,Bob Dylan
174,30084273,My Wife's Home Town,"Well, I didn't come here to deal with a doggon...",Well come deal doggone thing came hear drop cy...,0.748616,0.251384,Bruce Springsteen,Bob Dylan
...,...,...,...,...,...,...,...,...
1690,5620370,Let's Be Friends (Skin to Skin),I been watchin' you a long time\nTrying to fig...,watchin long time Trying figure moving line ti...,0.884423,0.115577,Bob Dylan,Bruce Springsteen
1722,32625921,I Believe in Miracles,I used to be on an endless run;\nBelieved in m...,used endless Believed miracles 'cause blessed ...,0.530043,0.469957,Bob Dylan,Bruce Springsteen
1812,24940019,Don't Fall Apart On Me Tonight,"Just a minute before you leave, girl\nJust a m...",Just minute leave girl Just minute touch door ...,0.757987,0.242013,Bruce Springsteen,Bob Dylan
1836,23366984,Awake [Live],A beautiful and blinding morning\nThe world ou...,beautiful blinding morning world outside begin...,0.960954,0.039046,Bob Dylan,Bruce Springsteen


In [121]:
lr_mist = compare['song_title'].tolist()

In [122]:
lr_mist

['Someday Baby',
 'I Want You [Take 1] [Take]',
 'I Want You',
 'I Want You',
 "My Wife's Home Town",
 'Under the Red Sky',
 'War',
 'When Will I Be Loved',
 'Bloodshot Eyes',
 'War',
 'Angel Eyes',
 'Easy Money',
 'The Ghost of Tom Joad',
 'Held Up Without a Gun',
 'I Want You',
 'The Wrestler [*]',
 'To Be Alone with You [Take 1] [Take]',
 "My Wife's Home Town",
 'I Want You',
 'Give My Love to Rose',
 'This Ole House',
 'Superstar',
 'When the Saints Go Marching In [Live]',
 'Un-Break My Heart',
 "Something's Burning, Baby",
 'Just Like This Train',
 'You Angel You',
 'Punishment Fits the Crime',
 'Someday Baby',
 'Went to See the Gypsy',
 'I Want You',
 'Step It Up and Go [Remastered]',
 'God Knows',
 'Code of Silence',
 'If I Had a Hammer [*]',
 'John Henry',
 'I Want You',
 'Held Up Without a Gun',
 'I Want You',
 'I Want You',
 'I Want You',
 'Never Gonna Be the Same Again',
 "Don't Fall Apart on Me Tonight",
 'God Knows',
 "Tell Me That It Isn't True",
 'Why Was I Born',
 'This

In [123]:
nb_mist = compare_nb ['song_title'].tolist()

In [124]:
nb_mist

['Someday Baby',
 'I Want You [Take 1] [Take]',
 'I Want You',
 'I Want You',
 "My Wife's Home Town",
 'Under the Red Sky',
 'War',
 'When Will I Be Loved',
 'Bloodshot Eyes',
 'War',
 'Angel Eyes',
 'Easy Money',
 'The Ghost of Tom Joad',
 'Held Up Without a Gun',
 'I Want You',
 'The Wrestler [*]',
 'To Be Alone with You [Take 1] [Take]',
 "My Wife's Home Town",
 'I Want You',
 'Give My Love to Rose',
 'This Ole House',
 'Superstar',
 'When the Saints Go Marching In [Live]',
 'Un-Break My Heart',
 "Something's Burning, Baby",
 'Just Like This Train',
 'You Angel You',
 'Punishment Fits the Crime',
 'Someday Baby',
 'Went to See the Gypsy',
 'I Want You',
 'Step It Up and Go [Remastered]',
 'God Knows',
 'Code of Silence',
 'If I Had a Hammer [*]',
 'John Henry',
 'I Want You',
 'Held Up Without a Gun',
 'I Want You',
 'I Want You',
 'I Want You',
 'Never Gonna Be the Same Again',
 "Don't Fall Apart on Me Tonight",
 'God Knows',
 "Tell Me That It Isn't True",
 'Why Was I Born',
 'This

In [125]:
compare_mist = [element for element in nb_mist if element not in lr_mist]


In [126]:
compare_mist

[]