In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install spotipy

Collecting spotipy
  Downloading spotipy-2.23.0-py3-none-any.whl (29 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-5.0.1-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.3/250.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: redis, spotipy
Successfully installed redis-5.0.1 spotipy-2.23.0


In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time
import numpy as np

In [None]:
# read from csv that contains client id and client secret
# instead of sharing my personal details in notebook
spotify_client_info = pd.read_csv('/content/drive/MyDrive/MinorProject7/spotify_client_info.csv')

In [None]:
client_id = spotify_client_info.iloc[0,0]
client_secret = spotify_client_info.iloc[0,1]

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [None]:
# my spotify username and playlist ids
# on playlist page, click on "..." -> then on "Share" -> then "Copy Spotify URI"
def getTrackIDs(user, playlist_id):
    ids = []
    playlist = sp.user_playlist(user, playlist_id)
    for item in playlist['tracks']['items']:
        track = item['track']
        ids.append(track['id'])
    return ids

In [None]:
# Get spotify singular song data from these locations
# https://developer.spotify.com/documentation/web-api/reference/#/operations/get-track
# https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-features
def getTrackFeatures(id):
    meta = sp.track(id)
    features = sp.audio_features(id)

    # meta
    name = meta['name']
    album = meta['album']['name']
    artist = meta['album']['artists'][0]['name']
    release_date = meta['album']['release_date']
    length = meta['duration_ms']
    popularity = meta['popularity']

    # features
    acousticness = features[0]['acousticness']
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    loudness = features[0]['loudness']
    speechiness = features[0]['speechiness']
    tempo = features[0]['tempo']
    valence = features[0]['valence']
    time_signature = features[0]['time_signature']
    key = features[0]['key']
    mode = features[0]['mode']
    uri = features[0]['uri']
    track = [name, album, artist, release_date,
             length, popularity, acousticness,
             danceability, energy, instrumentalness,
             liveness, loudness, speechiness, tempo,
             valence, time_signature,
             key, mode, uri]
    return track

In [None]:
# loop over track ids to get all songs in playlist
def loop_playist(playlist_ids):
    tracks = []
    for i in range(len(playlist_ids)):
        time.sleep(.2)
        track = getTrackFeatures(playlist_ids[i])
        tracks.append(track)
    return tracks

In [None]:
# turn data into dataframe
def get_spotify_df(tracks, year):
    df = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date',
                                         'length', 'popularity', 'acousticness', 'danceability',
                                         'energy', 'instrumentalness', 'liveness', 'loudness',
                                         'speechiness', 'tempo', 'valence', 'time_signature',
                                         'key', 'mode', 'uri'])
    return df

In [None]:
def get_years(df):
    years = []
    for date in df['release_date'].values:
        if '-' in date:
            years.append(date.split('-')[0])
        else:
            years.append(date)
    df['release_year'] = years
    return df

In [None]:
spotify_users_and_playlists = pd.read_csv('/content/drive/MyDrive/MinorProject7/spotify_users_and_playlists.csv')

### Ananya

In [None]:
ananya_user = spotify_users_and_playlists.iloc[0,0]

ananya_playlist_2021 = spotify_users_and_playlists.iloc[0,1]
ananya_playlist_2022 = spotify_users_and_playlists.iloc[1,1]
ananya_playlist_2023 = spotify_users_and_playlists.iloc[2,1]

In [None]:
ananya_ids_2021 = getTrackIDs(ananya_user, ananya_playlist_2021)
ananya_ids_2022 = getTrackIDs(ananya_user, ananya_playlist_2022)
ananya_ids_2023 = getTrackIDs(ananya_user, ananya_playlist_2023)

In [None]:
# takes between 4 and 5 minutes on my laptop
time_start = time.time()
ananya_ids_2021_playlist_loop = loop_playist(ananya_ids_2021)
ananya_ids_2022_playlist_loop = loop_playist(ananya_ids_2022)
ananya_ids_2023_playlist_loop = loop_playist(ananya_ids_2023)
time_end = time.time()
print((time_end - time_start)/60)

0.9061876932779948


In [None]:
ananya_df_2021 = get_spotify_df(ananya_ids_2021_playlist_loop, 2021)
ananya_df_2022 = get_spotify_df(ananya_ids_2022_playlist_loop, 2022)
ananya_df_2023 = get_spotify_df(ananya_ids_2023_playlist_loop, 2023)

In [None]:
ananya_df_2021 = get_years(ananya_df_2021)
ananya_df_2022 = get_years(ananya_df_2022)
ananya_df_2023 = get_years(ananya_df_2023)

In [None]:
# save dataframes
dfs = [ananya_df_2021,ananya_df_2022,ananya_df_2023]
names = ['Ananya_2021', 'Ananya_2022', 'Ananya_2023']
for df, name in zip(dfs, names):
    df.to_csv(f'/content/drive/MyDrive/MinorProject7/{name}_Top_Songs.csv', index=False)

In [None]:
ananya_df_2021 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Ananya_2021_Top_Songs.csv')
ananya_df_2022 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Ananya_2022_Top_Songs.csv')
ananya_df_2023 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Ananya_2023_Top_Songs.csv')

### Ritika

In [None]:
ritika_user = spotify_users_and_playlists.iloc[3,0]

ritika_playlist_2021 = spotify_users_and_playlists.iloc[3,1]
ritika_playlist_2022 = spotify_users_and_playlists.iloc[4,1]
ritika_playlist_2023 = spotify_users_and_playlists.iloc[5,1]

In [None]:
ritika_ids_2021 = getTrackIDs(ritika_user, ritika_playlist_2021)
ritika_ids_2022 = getTrackIDs(ritika_user, ritika_playlist_2022)
ritika_ids_2023 = getTrackIDs(ritika_user, ritika_playlist_2023)

In [None]:
# takes between 4 and 5 minutes on my laptop
time_start = time.time()
ritika_ids_2021_playlist_loop = loop_playist(ritika_ids_2021)
ritika_ids_2022_playlist_loop = loop_playist(ritika_ids_2022)
ritika_ids_2023_playlist_loop = loop_playist(ritika_ids_2023)
time_end = time.time()
print((time_end - time_start)/60)

In [None]:
ritika_df_2021 = get_spotify_df(ritika_ids_2021_playlist_loop, 2021)
ritika_df_2022 = get_spotify_df(ritika_ids_2022_playlist_loop, 2022)
ritika_df_2023 = get_spotify_df(ritika_ids_2023_playlist_loop, 2023)

In [None]:
ritika_df_2021 = get_years(ritika_df_2021)
ritika_df_2022 = get_years(ritika_df_2022)
ritika_df_2023 = get_years(ritika_df_2023)

In [None]:
# save dataframes
dfs = [ritika_df_2021, ritika_df_2022, ritika_df_2023]
names = ['Ritika_2021', 'Ritika_2022', 'Ritika_2023']
for df, name in zip(dfs, names):
    df.to_csv(f'/content/drive/MyDrive/MinorProject7/{name}_Top_Songs.csv', index=False)

In [None]:
ritika_df_2021 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Ritika_2021_Top_Songs.csv')
ritika_df_2022 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Ritika_2022_Top_Songs.csv')
ritika_df_2023 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Ritika_2023_Top_Songs.csv')

### Shriya

In [None]:
shriya_user = spotify_users_and_playlists.iloc[6,0]

shriya_playlist_2021 = spotify_users_and_playlists.iloc[6,1]
shriya_playlist_2022 = spotify_users_and_playlists.iloc[7,1]
shriya_playlist_2023 = spotify_users_and_playlists.iloc[8,1]

In [None]:
shriya_ids_2021 = getTrackIDs(shriya_user, shriya_playlist_2021)
shriya_ids_2022 = getTrackIDs(shriya_user, shriya_playlist_2022)
shriya_ids_2023 = getTrackIDs(shriya_user, shriya_playlist_2023)

In [None]:
# takes between 4 and 5 minutes on my laptop
time_start = time.time()
shriya_ids_2021_playlist_loop = loop_playist(shriya_ids_2021)
shriya_ids_2022_playlist_loop = loop_playist(shriya_ids_2022)
shriya_ids_2023_playlist_loop = loop_playist(shriya_ids_2023)
time_end = time.time()
print((time_end - time_start)/60)

In [None]:
shriya_df_2021 = get_spotify_df(shriya_ids_2021_playlist_loop, 2021)
shriya_df_2022 = get_spotify_df(shriya_ids_2022_playlist_loop, 2022)
shriya_df_2023 = get_spotify_df(shriya_ids_2023_playlist_loop, 2023)

In [None]:
shriya_df_2021 = get_years(shriya_df_2021)
shriya_df_2022 = get_years(shriya_df_2022)
shriya_df_2023 = get_years(shriya_df_2023)

In [None]:
# save dataframes
dfs = [shriya_df_2021, shriya_df_2022, shriya_df_2023]
names = ['Shriya_2021', 'Shriya_2022', 'Shriya_2023']
for df, name in zip(dfs, names):
    df.to_csv(f'/content/drive/MyDrive/MinorProject7/{name}_Top_Songs.csv', index=False)

In [None]:
shriya_df_2021 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Shriya_2021_Top_Songs.csv')
shriya_df_2022 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Shriya_2022_Top_Songs.csv')
shriya_df_2023 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Shriya_2023_Top_Songs.csv')

### Arushi

In [None]:
arushi_user = spotify_users_and_playlists.iloc[9,0]

arushi_playlist_2021 = spotify_users_and_playlists.iloc[9,1]
arushi_playlist_2022 = spotify_users_and_playlists.iloc[10,1]
arushi_playlist_2023 = spotify_users_and_playlists.iloc[11,1]

In [None]:
arushi_ids_2021 = getTrackIDs(arushi_user, arushi_playlist_2021)
arushi_ids_2022 = getTrackIDs(arushi_user, arushi_playlist_2022)
arushi_ids_2023 = getTrackIDs(arushi_user, arushi_playlist_2023)

In [None]:
# takes between 4 and 5 minutes on my laptop
time_start = time.time()
arushi_ids_2021_playlist_loop = loop_playist(arushi_ids_2021)
arushi_ids_2022_playlist_loop = loop_playist(arushi_ids_2022)
arushi_ids_2023_playlist_loop = loop_playist(arushi_ids_2023)
time_end = time.time()
print((time_end - time_start)/60)

1.0041272481282553


In [None]:
arushi_df_2021 = get_spotify_df(arushi_ids_2021_playlist_loop, 2021)
arushi_df_2022 = get_spotify_df(arushi_ids_2022_playlist_loop, 2022)
arushi_df_2023 = get_spotify_df(arushi_ids_2023_playlist_loop, 2023)

In [None]:
arushi_df_2021 = get_years(arushi_df_2021)
arushi_df_2022 = get_years(arushi_df_2022)
arushi_df_2023 = get_years(arushi_df_2023)

In [None]:
# save dataframes
dfs = [arushi_df_2021, arushi_df_2022, arushi_df_2023]
names = ['Arushi_2021', 'Arushi_2022', 'Arushi_2023']
for df, name in zip(dfs, names):
    df.to_csv(f'/content/drive/MyDrive/MinorProject7/{name}_Top_Songs.csv', index=False)

In [None]:
arushi_df_2021 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Arushi_2021_Top_Songs.csv')
arushi_df_2022 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Arushi_2022_Top_Songs.csv')
arushi_df_2023 = pd.read_csv(f'/content/drive/MyDrive/MinorProject7/Arushi_2023_Top_Songs.csv')