In [18]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth 
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

In [19]:
# To get client ID and client secret
client_id = '9ff8454bd3e049b6a1229206ce536ac8'  # <-- Replace with your own client ID
client_secret = '24c245fb45fa46eb8c57d36dd8770b3d' # <-- Replace with your own client secret
auth_manage = SpotifyClientCredentials(client_id = client_id, client_secret = client_secret)
sp = spotipy.Spotify(client_credentials_manager = auth_manage)

In [20]:
# Getting ID of songs in Playlist
def get_playlist_ids (user, playlist_id):
    track_ids = []
    album = sp.user_playlist(user, playlist_id)
    for item in album ['tracks']['items']:
        track = item['track']
        track_ids.append(track['id'])
    return track_ids

The user_id below should be your username in your Spotify profile page, and the playlist_id is the last part of the playlist URL, after open.spotify.com/playlist/

In [21]:
user_id = '31flqmapu6wd5a57mqga7pbgfobq' 
playlist_id = '68BNq6D6mv5qVF7PXUKl7X'
# Get track IDs in the playlist
track_ids = get_playlist_ids(user_id, playlist_id)
# print(track_ids)

In [22]:
# Function to get track info and features
def get_track_data(track_id):
    track_info = sp.track(track_id)
    features_info = sp.audio_features(track_id)[0]
    return {
        'id': track_info['id'],
        'name': track_info['name'],
        'artists': ', '.join([artist['name'] for artist in track_info['artists']]),
        'popularity': track_info['popularity'],
        'release_date': track_info['album']['release_date'],
        'explicit': track_info['explicit'],
        'duration_ms': track_info['duration_ms'],
        'danceability': features_info['danceability'],
        'energy': features_info['energy'],
        'key': features_info['key'],
        'mode': features_info['mode'],
        'tempo': features_info['tempo'],
        'valence': features_info['valence']
    }


In [23]:
# Get track data for all tracks in the playlist
playlist_data = [get_track_data(track_id) for track_id in track_ids]

# Convert the list of dictionaries to a DataFrame
playlist_df = pd.DataFrame(playlist_data)

# Save the DataFrame to a CSV file
playlist_df.to_csv('playlist_features.csv', index=False)

# Display the first five rows of the DataFrame
print(playlist_df.head())

                       id                   name     artists  popularity  \
0  0obBFrPYkSoBJbvHfUIhkv     Sexy And I Know It       LMFAO          65   
1  0js3FSEsVCVHHsIWp0Y1q8           Good Feeling    Flo Rida           0   
2  6hkOqJ5mE093AQf2lbZnsG  The One That Got Away  Katy Perry          75   
3  73CMRj62VK8nUS4ezD2wvi   Set Fire to the Rain       Adele          83   
4  53QF56cjZA9RTuuMZDrSA6        I Won't Give Up  Jason Mraz          72   

  release_date  explicit  duration_ms  danceability  energy  key  mode  \
0   2011-01-01     False       199480         0.707   0.861    7     1   
1   2022-06-24     False       248133         0.706   0.890    1     0   
2   2010-01-01     False       227333         0.691   0.795    1     0   
3   2011-01-24     False       242973         0.603   0.670    2     0   
4   2012-04-13     False       240165         0.483   0.303    4     1   

     tempo  valence  
0  130.021    0.795  
1  128.012    0.685  
2  133.971    0.876  
3  107.993

In [24]:
playlist_df.dtypes

id               object
name             object
artists          object
popularity        int64
release_date     object
explicit           bool
duration_ms       int64
danceability    float64
energy          float64
key               int64
mode              int64
tempo           float64
valence         float64
dtype: object

In [25]:
# Print out track data of a specific track
example_track_id = '6hkOqJ5mE093AQf2lbZnsG'
example_track_data = get_track_data(example_track_id)
print("Example Track Data:", example_track_data)

Example Track Data: {'id': '6hkOqJ5mE093AQf2lbZnsG', 'name': 'The One That Got Away', 'artists': 'Katy Perry', 'popularity': 75, 'release_date': '2010-01-01', 'explicit': False, 'duration_ms': 227333, 'danceability': 0.691, 'energy': 0.795, 'key': 1, 'mode': 0, 'tempo': 133.971, 'valence': 0.876}


In [26]:
# Getting highest and lowest populated songs
highest_populated_song = playlist_df.loc[playlist_df['popularity'].idxmax()]
lowest_populated_song = playlist_df.loc[playlist_df['popularity'].idxmin()]
print("Highest Populated Song:", highest_populated_song['name'])
print("Lowest Populated Song:", lowest_populated_song['name'])

Highest Populated Song: Locked out of Heaven
Lowest Populated Song: Good Feeling


In [27]:
# Print out the first five high populated songs
high_populated = playlist_df.loc[lambda playlist: playlist["popularity"] >= 70].sort_values(by=["popularity"], ascending = False)
high_populated.head()

Unnamed: 0,id,name,artists,popularity,release_date,explicit,duration_ms,danceability,energy,key,mode,tempo,valence
32,3w3y8KPTfNeOKPiqUTakBh,Locked out of Heaven,Bruno Mars,91,2012-12-07,False,233478,0.726,0.698,5,1,143.994,0.867
27,6VObnIkLVruX4UVyxWhlqm,Skyfall,Adele,86,2012-10-04,False,286480,0.346,0.552,0,0,75.881,0.0789
3,73CMRj62VK8nUS4ezD2wvi,Set Fire to the Rain,Adele,83,2011-01-24,False,242973,0.603,0.67,2,0,107.993,0.446
21,2iUmqdfGZcHIhS3b9E9EWq,Everybody Talks,Neon Trees,81,2012-01-01,True,177280,0.471,0.924,8,1,154.961,0.725
17,3bC1ahPIYt1btJzSSEyyrF,Whistle,Flo Rida,81,2012-06-22,False,224653,0.747,0.937,0,1,103.976,0.739


In [28]:
# Print out the five lowest populated songs
low_populated = playlist_df.loc[lambda playlist: playlist["popularity"] <= 30].sort_values(by=["popularity"], ascending=True)
low_populated.head()

Unnamed: 0,id,name,artists,popularity,release_date,explicit,duration_ms,danceability,energy,key,mode,tempo,valence
1,0js3FSEsVCVHHsIWp0Y1q8,Good Feeling,Flo Rida,0,2022-06-24,False,248133,0.706,0.89,1,0,128.012,0.685
11,7bR1jsWttxgvgvHZ2NhCO3,Call Me Maybe,Carly Rae Jepsen,0,2022-06-30,False,193146,0.772,0.6,7,1,120.026,0.644
12,2M4JeeHaLCAVgzze2C07zM,What Makes You Beautiful - Originally By One D...,HOT 100,0,2012-03-22,False,29733,0.666,0.878,9,0,124.965,0.932
5,2Pi11XaXLiQFtknSnoVbQ3,Rack City,Tyga,4,2021-10-22,True,208440,0.93,0.332,1,1,98.966,0.255
7,2BqSKeUHLR7o30IEt9OJa4,Stronger (What Doesn't Kill You) - 7th Heaven ...,Kelly Clarkson,15,2012-02-03,False,448440,0.619,0.861,9,0,126.002,0.659


In [29]:
# Changing the 'Release_Date' column type from object to date
playlist_df["release_date"] = pd.to_datetime(playlist_df["release_date"])

old_songs = playlist_df.sort_values(by="release_date", ascending=True)
old_songs.head()

Unnamed: 0,id,name,artists,popularity,release_date,explicit,duration_ms,danceability,energy,key,mode,tempo,valence
2,6hkOqJ5mE093AQf2lbZnsG,The One That Got Away,Katy Perry,75,2010-01-01,False,227333,0.691,0.795,1,0,133.971,0.876
0,0obBFrPYkSoBJbvHfUIhkv,Sexy And I Know It,LMFAO,65,2011-01-01,False,199480,0.707,0.861,7,1,130.021,0.795
6,6MAdEUilV2p9RQUqE5bMAK,Domino,Jessie J,67,2011-01-01,False,231693,0.757,0.547,7,1,126.977,0.793
22,4sK96UnGx3NjBaqvfTG2dm,Too Close,Alex Clare,60,2011-01-01,False,256560,0.584,0.712,11,0,126.043,0.286
3,73CMRj62VK8nUS4ezD2wvi,Set Fire to the Rain,Adele,83,2011-01-24,False,242973,0.603,0.67,2,0,107.993,0.446


In [34]:
# Scatter Plot
figure = px.scatter(playlist_df, x = playlist_df['tempo'], y = playlist_df['danceability'], color = playlist_df['popularity'], 
                    title = 'Scatter Plot of Popular Songs using Tempo against Danceability')

figure.show()

From the figure above, in the playlist, the most popular songs are around medium tempo and danceability. It could have different results when testing different playlists. Most of the songs in the playlist are below 0.8 danceability. It seems that it has a logarithmic relationship, for this playlist.

In [31]:
# valence vs energy scatterplot
scatter_valence_energy = px.scatter(playlist_df, x='valence', y='energy',color='tempo',
                                    title='Scatter Plot of Valence vs Energy')
scatter_valence_energy.show()

When comparing valence and energy, there is a postive correlation between them. Usually the higher the valence the higher the energy. We can also tell that the points on the right have higher tempo as well, which means three of them are in postive correlation.

In [32]:
# Artists with songs having popularity greater than 70
popular_artists = playlist_df[playlist_df['popularity'] > 70]['artists'].value_counts()
print("Artist with Songs Popularity > 70:")
print(popular_artists)

Artist with Songs Popularity > 70:
Katy Perry       3
Taylor Swift     3
Rihanna          2
Adele            2
Neon Trees       1
Bruno Mars       1
Ne-Yo            1
Kesha            1
One Direction    1
PSY              1
Maroon 5         1
fun.             1
Flo Rida         1
Train            1
Justin Bieber    1
Jason Mraz       1
The Lumineers    1
Name: artists, dtype: int64


We see that Katy Perry and Taylor Swift has most populated songs in this playlist.

In [33]:
# pie chart
pop_70 =  playlist_df.loc[lambda playlist: playlist["popularity"] > 70].sort_values(by=["popularity"], ascending=False)

chart = px.pie(pop_70, popular_artists.index, popular_artists.values, 
             title = 'Artist whose song has high popularity greater than 70')
chart.update_traces(textposition = "inside", textinfo = "percent + label") 
chart.show()