In [None]:
# Getting html of Wikipedia Article of Billie Eilish Songs

import requests
from bs4 import BeautifulSoup

url = "https://en.wikipedia.org/wiki/List_of_songs_recorded_by_Billie_Eilish"
req = requests.get(url)
soup = BeautifulSoup(req.content, 'html.parser')
print(soup.prettify())

In [None]:
# Converting html into Pandas DataFrame

import pandas as pd

table = soup.findAll('table')[1]

df = pd.read_html(str(table))[0]

In [None]:
df.head()

In [None]:
# Processing Song Names

import re

def btwn_quotes(text):
    return re.search('"(.*)"', text).group(1)

df['song_name'] = df['Song'].apply(btwn_quotes)
df.head()

In [None]:
# Setting up Genius Lyrics API

import lyricsgenius as lg

api_key = "EgYn5CGeN0D6qo_VUhB7T55jpxYrKAtBje7__rE4g8ViVigZ6my2ygsiGKAm0CP5"
genius = lg.Genius(api_key, timeout=120)

genius.verbose = False

In [None]:
# Getting Lyrics Using Genius API

from tqdm import tqdm
import concurrent.futures
import numpy as np
import swifter


def get_lyrics(song_name):
    artist_name = 'Billie Eilish'
    
    try:
        song = genius.search_song(title=song_name, artist=artist_name, get_full_info=False)
        return song.lyrics
    except:
        return np.nan

lyrics = []
for song in tqdm(df['song_name']):
    lyrics += [get_lyrics(song)]

df['lyrics'] = lyrics
df.head()

In [None]:
df['lyrics'][0]

In [None]:
# Processing Lyrics Scraped from Genius

from gensim.utils import simple_preprocess

def pre_process(lyric):
    lyric = re.sub('\n', ' ', lyric)
    lyric = re.sub("[\(\[].*?[\)\]]", " ", lyric)
    lyric = re.search('Lyrics (.*)Embed', lyric).group(1)
    return ' '.join(simple_preprocess(lyric, min_len=0))

df['lyrics_pre'] = df['lyrics'].swifter.apply(pre_process)
df.head()

In [None]:
# Setting up Pre-Trained Sentiment Analyzer

import nltk
nltk.download('vader_lexicon')

from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

In [None]:
# Calculating sentiment scores for each song

def get_sentiment(text):
    return sia.polarity_scores(text)['compound']

# get_sentiment(df['lyrics'][1])
df['sentiment'] = df['lyrics_pre'].apply(get_sentiment)
df.head()

In [None]:
# Setting up Spotify API

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id='0fb4cbdccb814eabb3654bcadc5706a3'
client_secret='b4ed60eeaf324ccbb557e0a196938be2'

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) #spotify object to access API

In [None]:
# Getting valence scores for each song

def get_valence(song_name):
    try:
        artist_name = 'Billie Eilish'
        search = sp.search(q='artist:' + artist_name + ' track:' + song_name, type='track')
        uri = search['tracks']['items'][0]['uri']

        features = sp.audio_features(uri)
        return features[0]['valence']
    except:
        return np.nan

tqdm.pandas()
df['valence'] = df['song_name'].progress_apply(get_valence)
# get_valence('&burn')


In [None]:
df.head()

In [None]:
# Aggregating sentiment and valence by year

agg_funcs = dict(sentiment='mean', valence='mean')
graph_df = df.groupby(by='Year').agg(agg_funcs)
graph_df.head(10)

In [None]:
# Graphing sentiment and valence per year

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(rc={'figure.figsize':(16,8)})
sns.set_theme(style="whitegrid")

plt.xticks(range(2016,2022))
sns.lineplot(data=graph_df, palette=['Blue', 'Green'])