In [None]:
!pip install spotipy==2.23.0

In [None]:
!pip install billboard.py

In [None]:
# Genius.com lyric scraper

import re
import requests
from bs4 import BeautifulSoup
import time

def parentheses_cleaner(lyrics):
    '''Condenses parenthetical expressions printed on multiple new lines and appends it to the previous lyric 
    line without influencing any following lyrical content'''
    pattern = re.compile(r'(\s*\(\s*)(.*?)(\s*\)\s*)(?=\n|$)', re.DOTALL)

    def replace_pattern(match):
        removed_lines = ' '.join(match.group(2).split())
        return ' (' + removed_lines + ')'

    cleaned_lyrics = pattern.sub(replace_pattern, lyrics)
    cleaned_lyrics = re.sub(r'(\S)(\s*\(\s*.*?\s*\))', r'\1 \2', cleaned_lyrics)
    return cleaned_lyrics.strip()

def scrape_song_lyrics(url):
    '''Scrapes song lyrics from Genius given a url for a song'''
    try:
        response = requests.get(url)
        if response.status_code == 200:
            html_content = response.text
            soup = BeautifulSoup(html_content, 'html.parser')
            
            parent_container = soup.find("div", class_="lyrics") or soup.find("div", class_="lyricbox")
            if not parent_container:
                lyric_elements = soup.select("div[class*='Lyrics__Container'], div[class*='lyrics']")
                lyrics = '\n'.join([elem.get_text(separator="\n") for elem in lyric_elements]).strip()
            else:
                lyric_elements = parent_container.find_all(["div", "p"], recursive=True)
            
            lyrics = re.sub(r'\[.*?\]', '', lyrics, flags=re.DOTALL)
            lyrics = parentheses_cleaner(lyrics)

            return lyrics if lyrics else "Lyrics not found for {url}."
        else:
            return f"Failed to retrieve the webpage: HTTP {response.status_code} for {url}"

    except Exception as e:
        return f"An error occurred: {e}"


In [None]:
# Spotify API Testing
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = "66fe334973004bbe98cbacd7cd52f0fd"
client_secret = "54922cf3dfd0424db4b3dff226749175"

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

track_results = sp.search(q='artist:Ed Sheeran track:Shape of You', type='track')
track_id = track_results['tracks']['items'][0]['id']
print(f"Track ID: {track_id}")

audio_features = sp.audio_features(tracks=[track_id])[0]

print(f"Danceability: {audio_features['danceability']}")
print(f"Energy: {audio_features['energy']}")
print(f"Tempo: {audio_features['tempo']} BPM")

In [None]:
# Sentiment Analysis Testing

!pip install textblob
!python -m textblob.download_corpora

In [None]:
# Sentiment Analysis Testing

from textblob import TextBlob

blob = TextBlob(lyrics)
sentiment = blob.sentiment

valence = audio_features['valence']
energy = audio_features['energy']

print(f"Sentiment Analysis of 'Shape of You':\nPolarity: {sentiment.polarity}\nSubjectivity: {sentiment.subjectivity}")

# Deciding sentiment based on polarity
if sentiment.polarity > 0:
    print("Overall Sentiment: Positive")
elif sentiment.polarity < 0:
    print("Overall Sentiment: Negative")
else:
    print("Overall Sentiment: Neutral")

# Investigate correlations (simplified example)
polarity = blob.sentiment.polarity
if valence > 0.5 and polarity > 0:
    print("Both the music and lyrics convey a positive mood.")
elif valence < 0.5 and polarity < 0:
    print("Both the music and lyrics convey a negative mood.")
else:
    print("The mood conveyed by the music and lyrics might differ.")


In [None]:
# Getting Recent Billboard Data
import billboard
chart = billboard.ChartData('hot-100', date='2024-03-16')
print(chart)

In [None]:
# Formatting Song Data Function
import re

def get_song_info(song):
    '''Takes a given song and formats it into two parts the song and the artist(s)'''
    text = re.search(r"'(.+)' by (.+)", song)
    song_name = text.group(1)
    song_name = re.sub(r"'", '', song_name)
    artist_names = text.group(2)
    artist_names = re.sub(r'\s+(Featuring|With)\s+.*$', '', artist_names)
    artists = re.split(r'[:,&]', artist_names)
    artists = [name.strip() for name in artists]
    return [song_name, artists]

In [None]:
# Splitting the Song Data
song_names = []
artists_names = []

for song in chart:
    data = get_song_info(str(song))
    song_names.append(data[0])
    artists_names.append(data[1])
print(artists_names)

In [None]:
# Main Execution - Tracklist (Songs + Artists)

artist_tracks = {}
for index in range(len(song_names)):
    if artists_names[index][0] in artist_tracks:
        artist_tracks[artists_names[index][0]].append(song_names[index])
    else:
        artist_tracks[artists_names[index][0]] = []
        artist_tracks[artists_names[index][0]].append(song_names[index])


In [None]:
print(artist_tracks)

In [None]:
# Main Execution - Audio Features
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = "66fe334973004bbe98cbacd7cd52f0fd"
client_secret = "54922cf3dfd0424db4b3dff226749175"

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

song_audio_features = []
track_names = []

for artist, tracks in artist_tracks.items():
    for track in tracks:
        query = f'artist:{artist} track:{track}'
        result = sp.search(q=query, type='track', limit=1)

        if result['tracks']['items']:
            track_info = result['tracks']['items'][0]
            track_id = track_info['id']
            track_name = track
            track_artist = artist
            '''track_ids.append(track_id)'''

            audio_features = sp.audio_features(track_id)[0]

            song_audio_features.append({
                'artist': track_artist,
                'title': track_name,
                'track_id': track_id,
                'audio_features': audio_features
            })
        else:
            print(f"No results found for {track} by {artist}")


if song_audio_features:
    first_song = song_audio_features[0]
    print(f"Artist: {first_song['artist']}")
    print(f"Title: {first_song['title']}")
    print(f"Track ID: {first_song['track_id']}")
    print("Audio Features:")
    for feature, value in first_song['audio_features'].items():
        print(f"{feature}: {value}")
else:
    print("No song was processed.")

In [None]:
# Formatting Song Data into a Usable URL

def create_url(song_data):
    '''Takes song data in the form of song name | artist(s) and converts it into a link
        for the song lyric website Genius'''
    base_url = 'https://genius.com/'
    url = []
    song = re.sub(r'[^\w\s-]', ' ', song_data[0]).strip()
    song = re.sub(r'\s+', '-', song)
    all_names = ''
    if len(song_data[1]) == 1:
        all_names = song_data[1][0].replace(' ', '-')
    else:
        for index in range(len(song_data[1])):
            if index == max(range(len(song_data[1]))):
                all_names += '-and-' + song_data[1][index].replace(' ', '-')
            elif index > 0:
                all_names += '-' + song_data[1][index].replace(' ', '-')
            else:
                all_names = song_data[1][0].replace(' ', '-')
    url.append(base_url + all_names + '-' + song + '-lyrics')
    return url

In [None]:
# Main Execution - Lyrics
song_lyrics = {}

for index in range(len(song_names)):
    time.sleep(20)
    url_artists = artists_names[index]
    filterered_artists = []
    for artist in url_artists:
        if bool(re.match(r'^[\W_]+$', artist)) == False:
            artist = artist.replace('$', 'S')
            filterered_artists.append(artist)
    song = song_names[index]
    url_song = song.replace('@', 'at')
    song_data = [url_song, filterered_artists]
    url = create_url(song_data)[0]
    lyrics = scrape_song_lyrics(url)
    song_lyrics[song] = lyrics
    




In [None]:
# Sentiment Analysis and Data Formatting

from textblob import TextBlob
sentiment_data = {}

for song, lyrics in song_lyrics.items():
    
    blob = TextBlob(lyrics)
    sentiment = blob.sentiment
    
    # getting the dict that contains the audio features of a given song
    index = 0
    for dictionary in song_audio_features:
        if dictionary['title'].lower() == song.lower():
            break
        else:
            index += 1
    
    # Defining variables
    artist = song_audio_features[index]['artist']
    valence = song_audio_features[index]['audio_features']['valence']
    energy = song_audio_features[index]['audio_features']['energy']
    danceability = song_audio_features[index]['audio_features']['danceability']
    tempo = song_audio_features[index]['audio_features']['tempo']
    loudness = song_audio_features[index]['audio_features']['loudness']
    polarity = sentiment.polarity
    subjectivity = sentiment.subjectivity

    # Overall Sentiment Analysis
    if sentiment.polarity > 0:
        overall_sentiment = 'Positive'
    elif sentiment.polarity < 0:
        overall_sentiment = 'Negative'
    else:
        overall_sentiment = 'Positive'
    
    # Storing Data
    sentiment_data[song] = {'Artist': artist,
                            'Overall Sentiment': overall_sentiment, 
                            'Polarity': polarity,
                            'Subjectivity': subjectivity,
                            'Valence': valence, 
                            'Energy': energy,
                            'Loudness': loudness,
                            'Tempo': tempo,
                            'Danceability': danceability}






In [None]:
# Storing Relevant Data in a Dataframe
import pandas as pd
'''
rows = [{**{'Song': song}, **data} for song, data in sentiment_data.items()]
df = pd.DataFrame(rows)
df.to_csv('data.csv', index=False)
'''

In [None]:
# Loading Data Frame and Displaying it
df = pd.read_csv("data.csv")
df.head(100)

In [None]:
# Creating subsets of the DataFrame to be neatly compiled into LaTeX

sentiment_subset = df.iloc[:10, :5]
audio_subset = df.iloc[:10, [0] + list(range(-5, 0))]
sentiment_latex = sentiment_subset.to_latex(index=True, caption="Subset of Song Dataframe", label="tab:df_subset")
audio_latex = audio_subset.to_latex(index=True, caption="Audio Features", label="tab:df_subset")
print(sentiment_latex)
print(audio_latex)

In [None]:
# Heat Plot
import matplotlib.pyplot as plt
import seaborn as sns

test = df[['Polarity','Valence', 'Danceability', 'Tempo', 'Loudness', 'Energy', 'Subjectivity']]
filtered = test[test['Polarity'] != 0]
corr_matrix = filtered.corr()
sns.heatmap(corr_matrix, cmap='coolwarm', annot=True, fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Scatterplot Sample 1
plt.scatter(df['Polarity'], df['Danceability'])
plt.xlabel('Polarity')
plt.ylabel('Danceability')
plt.title('Polarity vs Danceability')

In [None]:
# Scatterplot Sample 2
plt.scatter(df['Polarity'], df['Energy'])
plt.xlabel('Polarity')
plt.ylabel('Energy')
plt.title('Polarity vs Energy')

In [None]:
# Scatterplot Sample 3
plt.scatter(df['Loudness'], df['Energy'])
plt.xlabel('Loudness')
plt.ylabel('Energy')
plt.title('Loudness vs Energy')

In [None]:
!pip install plotly
!pip install plotly chart-studio

In [None]:
# Interactive Plot (Hoverable Points Only)

import plotly.express as px

fig = px.scatter(df, x='Valence', y='Polarity',
                 hover_data=['Song', 'Artist'], 
                 title='Polarity vs. Valence')

fig.show()

In [None]:
!pip install ipywidgets

In [None]:
# Interactive Plot that others can access with a link. An HTML of the graph can be saved.
# This module displays the interactive plot in Jupyter. For the external web app, see next code block.

import pandas as pd
import chart_studio
import chart_studio.plotly as py
import plotly.graph_objs as go
from ipywidgets import interact, Dropdown
import chart_studio.tools as tls
import plotly.offline as py_offline

chart_studio.tools.set_credentials_file(username='jeffnguyen177', api_key='lFuxBnVNDJeGMgoxnZ6Z')

def update_plot(x_axis, y_axis, upload=False, save_html=False):
    '''Generates a plot of the data with interactive features to change axes variables, and the option to save the
    plot as an html file'''
    fig = go.Figure(data=go.Scatter(x=df[x_axis],
                                    y=df[y_axis],
                                    mode='markers',
                                    text=df['Song'] + " by " + df['Artist'],
                                    marker=dict(color='LightSkyBlue', size=10, opacity=0.5)))
    
    fig.update_layout(title=f'{x_axis} vs. {y_axis}',
                      xaxis_title=x_axis,
                      yaxis_title=y_axis,
                      hovermode='closest')
    
    if upload:
        upload_plot(fig, filename=f'{x_axis}_vs_{y_axis}')
    elif save_html:
        file_name = f"{x_axis}_vs_{y_axis}.html"
        py_offline.plot(fig, filename=file_name, auto_open=True, include_plotlyjs=True)
        print(f"Plot saved as {file_name}")
    else:
        fig.show()
        
def upload_plot(fig, filename):
    '''Uploads the plot to Chart-Studio'''
    # Upload the plot with public access
    plot_url = py.plot(fig, filename=filename, auto_open=True, sharing='public')
    print(f"Plot is available at: {plot_url}")

x_axis_dropdown = Dropdown(options=list(df.columns), value='Polarity', description='X-axis')
y_axis_dropdown = Dropdown(options=list(df.columns), value='Subjectivity', description='Y-axis')
upload_checkbox = Dropdown(options=[False, True], description='Upload?')

interact(update_plot, x_axis=x_axis_dropdown, y_axis=y_axis_dropdown, upload=upload_checkbox)

In [None]:
# Modified version of interactive scatterplot so that it can be hosted via web-app (not meant to run in Jupyter, this is just the source code)

# Import necessary libraries
import pandas as pd
import streamlit as st
import plotly.graph_objs as go

df = pd.read_csv("data.csv")

def update_plot(x_axis, y_axis):
    '''Generates a plot of the data with interactive features to change axes variables, and the option to save the
    plot as an html file, this time using Streamlit widgets instead of ipywidgets'''
    fig = go.Figure(data=go.Scatter(x=df[x_axis],
                                    y=df[y_axis],
                                    mode='markers',
                                    text=df['Song'] + " by " + df['Artist'],
                                    marker=dict(color='LightSkyBlue', size=10, opacity=0.5)))
    
    fig.update_layout(title=f'{x_axis} vs. {y_axis}',
                      xaxis_title=x_axis,
                      yaxis_title=y_axis,
                      hovermode='closest')
    
    st.plotly_chart(fig)

# Create Streamlit widgets for selecting the X and Y axis data
x_axis = st.selectbox('Select X-axis:', options=df.columns, index=0)
y_axis = st.selectbox('Select Y-axis:', options=df.columns, index=1)

# Add a button to update the plot
st.button('Update Plot', on_click=update_plot, args=(x_axis, y_axis))

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=9fdf84f8-50cd-4376-85d1-2d7888957269' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>