In [1]:
import pandas as pd
import numpy as np
import os

import time

import spotipy
from spotipy.oauth2 import SpotifyOAuth
from spotipy.exceptions import SpotifyException

from scipy.spatial import distance

In [2]:
from dotenv import load_dotenv

In [3]:
from sklearn.neighbors import NearestNeighbors

In [4]:
load_dotenv()

True

In [5]:
SPOTIPY_CLIENT_ID = os.getenv('SPOTIPY_CLIENT_ID')
SPOTIPY_CLIENT_SECRET = os.getenv('SPOTIPY_CLIENT_SECRET')
SPOTIPY_REDIRECT_URI = os.getenv('SPOTIPY_REDIRECT_URI')

In [6]:
scope = "user-library-read user-top-read playlist-modify-public"

In [7]:
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

In [8]:
me = sp.current_user()

In [9]:
me

{'display_name': 'Sam Goodson',
 'external_urls': {'spotify': 'https://open.spotify.com/user/stg58661521'},
 'href': 'https://api.spotify.com/v1/users/stg58661521',
 'id': 'stg58661521',
 'images': [],
 'type': 'user',
 'uri': 'spotify:user:stg58661521',
 'followers': {'href': None, 'total': 1}}

Get bob

In [10]:
sp.artist('74ASZWbe4lXaubB36ztrGX')

{'external_urls': {'spotify': 'https://open.spotify.com/artist/74ASZWbe4lXaubB36ztrGX'},
 'followers': {'href': None, 'total': 6302011.0},
 'genres': ['classic rock',
  'country rock',
  'folk',
  'folk rock',
  'rock',
  'roots rock',
  'singer-songwriter'],
 'href': 'https://api.spotify.com/v1/artists/74ASZWbe4lXaubB36ztrGX',
 'id': '74ASZWbe4lXaubB36ztrGX',
 'images': [{'url': 'https://i.scdn.co/image/ab6772690000c46cd7064356b04a156664a37c4f',
   'height': 1000.0,
   'width': 1000.0},
  {'url': 'https://i.scdn.co/image/ab6772690000dd22d7064356b04a156664a37c4f',
   'height': 640.0,
   'width': 640.0},
  {'url': 'https://i.scdn.co/image/ab6772690000bac3d7064356b04a156664a37c4f',
   'height': 200.0,
   'width': 200.0},
  {'url': 'https://i.scdn.co/image/ab67726900008f74d7064356b04a156664a37c4f',
   'height': 64.0,
   'width': 64.0}],
 'name': 'Bob Dylan',
 'popularity': 70.0,
 'type': 'artist',
 'uri': 'spotify:artist:74ASZWbe4lXaubB36ztrGX'}

In [50]:
results = sp.artist_albums('74ASZWbe4lXaubB36ztrGX', album_type='album', limit=20)
albums = results['items']

In [51]:
results_two = sp.artist_albums('74ASZWbe4lXaubB36ztrGX', album_type='album', limit=20, offset=20)
albums_two = results_two['items']

In [52]:
results_three = sp.artist_albums('74ASZWbe4lXaubB36ztrGX', album_type='album', limit=20, offset=40)
albums_three = results_three['items']

In [53]:
results_four = sp.artist_albums('74ASZWbe4lXaubB36ztrGX', album_type='album', limit=20, offset=60)
albums_four = results_four['items']

In [54]:
albums_df = pd.DataFrame(albums)
albums_df_two = pd.DataFrame(albums_two)
albums_df_three = pd.DataFrame(albums_three)
albums_df_four = pd.DataFrame(albums_four)


In [29]:
albums_df_four['name'] 

0    The Bootleg Volume 6: Bob Dylan Live 1964 - Co...
1                 Bob Dylan Live At Carnegie Hall 1963
2                           The Freewheelin' Bob Dylan
3    Bob Dylan In Concert: Brandeis University 1963...
4    The Witmark Demos: 1962-1964 (The Bootleg Seri...
5                                            Bob Dylan
Name: name, dtype: object

In [108]:
all_albums = pd.concat([albums_df, albums_df_two])

In [15]:
def safe_spotify_request(call, *args, **kwargs):
    max_attempts = 5
    attempt = 0
    while attempt < max_attempts:
        try:
            return call(*args, **kwargs)
        except SpotifyException as e:
            if e.http_status == 429:  
                wait_time = int(e.headers.get('Retry-After', 30))  
                print(f"Rate limit exceeded. Retrying after {wait_time} seconds.")
                time.sleep(wait_time)
                attempt += 1
                wait_time *= 2  
            else:

                raise

    raise Exception("Maximum retry attempts reached.")


In [13]:
def get_top_features(sp, top_df):
    features_list = []
    
    for id in top_df['id']:
        features = safe_spotify_request(sp.audio_features, id)
        
        if features[0]: 
            features_df = pd.DataFrame(features)
            features_list.append(features_df)
    

    features_df = pd.concat(features_list, ignore_index=True)
    features_df = features_df.drop(columns=['type', 'uri', 'track_href', 'analysis_url', 'duration_ms'])

    return features_df


In [19]:
def get_tracks_df(sp,albums_df):
    track_list = []
    for album_id in albums_df['id']:
        tracks = sp.album_tracks(album_id)
        for track in tracks['items']:
            track_list.append(track)
    track_df = pd.DataFrame(track_list)
    features = get_top_features(sp,track_df)
    return features, track_df
    

In [47]:
def get_tracks_df_mod(sp, albums_df):
    track_list = []
    for index, row in albums_df.iterrows():
        album_id = row['id']
        album_name = row['name']  
        tracks = sp.album_tracks(album_id)
        for track in tracks['items']:
            track_info = {
                'id': track['id'],
                'track_name': track['name'],
                'track_uri': track['uri'],
                'album_name': album_name 
            }
            track_list.append(track_info)
    
    track_df = pd.DataFrame(track_list)
    return track_df


In [48]:
def merge_track_features(sp, albums_df):
    track_df = get_tracks_df_mod(sp, albums_df)
    features_df = get_top_features(sp, track_df)
    final_df = pd.merge(track_df, features_df, on='id')
    
    return final_df


In [22]:
track_df = get_tracks_df_mod(sp, albums_df)

In [55]:
bob_features_one = merge_track_features(sp, albums_df)

In [57]:
bob_features_two = merge_track_features(sp, albums_df_two)

In [58]:
bob_features_three = merge_track_features(sp, albums_df_three)

Max Retries reached


Rate limit exceeded. Retrying after 30 seconds.


Max Retries reached


Rate limit exceeded. Retrying after 30 seconds.


Max Retries reached


Rate limit exceeded. Retrying after 30 seconds.


Max Retries reached


Rate limit exceeded. Retrying after 30 seconds.


Max Retries reached


Rate limit exceeded. Retrying after 30 seconds.


Exception: Maximum retry attempts reached.

In [30]:
bob_features_four = merge_track_features(sp, albums_df_four)

In [31]:
bob_features_concat = pd.concat([bob_features_one, bob_features_two, bob_features_three, bob_features_four])

In [33]:
bob_features_concat.to_csv('data/bob_features.csv', index=False)

In [72]:
bob_features_concat = pd.read_csv('data/bob_features.csv')

In [73]:
bob_scratch = bob_features_concat.copy()

In [71]:
cat_col = ['mode', 'key', 'time_signature']
con_col = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

In [69]:
def process_bob(bob_df, cat_cols, con_cols):

    processed_data = pd.DataFrame()
    
    grouped = bob_df.groupby('album_name')
    
    for col in con_cols:
        if col in bob_df.columns:
            processed_data[col + '_mean'] = grouped[col].mean()
    
    for col in cat_cols:
        if col in bob_df.columns:
            processed_data[col + '_mode'] = grouped[col].apply(lambda x: x.mode()[0] if not x.mode().empty else None)
    
    processed_data = processed_data.reset_index()
    
    return processed_data


In [74]:
bob_album_ag = process_bob(bob_scratch, cat_col, con_col)

In [82]:
bob_album_ag.to_csv('data/bob_album_ag.csv', index=False)

In [10]:
top = sp.current_user_top_tracks()

In [11]:
top_df = pd.DataFrame(top['items'])

In [27]:
features_df = pd.DataFrame(features)

In [16]:
top_features = get_top_features(sp, top_df)

In [17]:
manual_catagorical_cols = ['mode', 'key', 'time_signature']

In [18]:
def process_dataframe(df, manual_categorical_cols=None, unique_value_threshold=10):
    processed_data = {}
    
    if manual_categorical_cols is None:
        manual_categorical_cols = []
    
    for col in df.columns:
        if df[col].nunique() <= unique_value_threshold or col in manual_categorical_cols:

            processed_data[col + '_mode'] = df[col].mode()[0]
        else:

            processed_data[col + '_mean'] = df[col].mean()
    
    return processed_data


In [19]:
#drop id col from top_features
top_features = top_features.drop(columns=['id'])

In [59]:
top_features.to_csv('data/example_user_features.csv', index=False)

In [20]:
averages = process_dataframe(top_features, manual_catagorical_cols)

Work on recommendatiosn 

In [48]:
bob_album_ag['album_name']

0     Another Self Portrait (1969-1971): The Bootleg...
1                             Another Side Of Bob Dylan
2                               Before The Flood (Live)
3                                      Blonde On Blonde
4                                   Blood On The Tracks
                            ...                        
61    Travelin' Thru, 1967 - 1969: The Bootleg Serie...
62                                           Triplicate
63    Trouble No More: The Bootleg Series, Vol. 13 /...
64                                    Under The Red Sky
65                                     World Gone Wrong
Name: album_name, Length: 66, dtype: object

In [49]:
max_similarity = -1  
closest_album = None

feature_columns = ['danceability_mean', 'energy_mean', 'loudness_mean','speechiness_mean',
                    'acousticness_mean', 'instrumentalness_mean','liveness_mean', 'valence_mean',
                      'tempo_mean', 'mode_mode', 'key_mode','time_signature_mode'] 

averages_features = [averages[col] for col in feature_columns]
averages_flat = np.array(averages_features).flatten()

for index, row in bob_album_ag.iterrows():
    album_features = row[feature_columns].tolist()  
    similarity = 1 - distance.cosine(averages_flat, album_features)  
    if similarity > max_similarity:
        max_similarity = similarity
        closest_album = row['album_name']  

print(f"The closest album to the user's preferences is: {closest_album}")

The closest album to the user's preferences is: The Basement Tapes Complete: The Bootleg Series, Vol. 11 (Deluxe Edition)


In [None]:
def find_closest_album(user_features,album_features, feature_columns):
    max_similarity = -1  
    closest_album = None

    for index, row in album_features.iterrows():
        album_features = row[feature_columns].tolist()  
        similarity = 1 - distance.cosine(user_features, album_features)  
        if similarity > max_similarity:
            max_similarity = similarity
            closest_album = row['album_name']  

    print(f"The closest album to the user's preferences is: {closest_album}")

Let's make a playlist

In [22]:
bob_scratch = pd.read_csv('data/bob_features.csv')

In [23]:
bob_scratch.columns

Index(['id', 'track_name', 'album_name', 'danceability', 'energy', 'key',
       'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence', 'tempo', 'time_signature'],
      dtype='object')

In [24]:
# drop id, track_name, and album_name from bob_scratch
X = bob_scratch.drop(columns=['id', 'track_name', 'album_name'])

In [25]:
knn = NearestNeighbors(n_neighbors=30)  
knn.fit(X)

In [26]:
averages

{'danceability_mean': 0.5064,
 'energy_mean': 0.43419850000000004,
 'key_mode': 3,
 'loudness_mean': -12.637500000000001,
 'mode_mode': 1,
 'speechiness_mean': 0.037834999999999994,
 'acousticness_mean': 0.52983,
 'instrumentalness_mean': 0.1080965205,
 'liveness_mean': 0.21473500000000004,
 'valence_mean': 0.48275499999999993,
 'tempo_mean': 119.59775000000002,
 'time_signature_mode': 4}

In [27]:
feature_columns = [
    'danceability_mean',
    'energy_mean',
    'key_mode',
    'loudness_mean',
    'mode_mode',
    'speechiness_mean',
    'acousticness_mean',
    'instrumentalness_mean',
    'liveness_mean',
    'valence_mean',
    'tempo_mean',
    'time_signature_mode'
]


In [28]:
user_profile = np.array([averages[feature] for feature in feature_columns]).reshape(1, -1)

In [29]:
distances, indices = knn.kneighbors(user_profile)



In [None]:
def get_recommended_songs(songs,user_info):
    X = songs.drop(columns=['id', 'track_name', 'album_name'])
    knn = NearestNeighbors(n_neighbors=30)
    knn.fit(X)
    user_profile = np.array([user_info[feature] for feature in feature_columns]).reshape(1, -1)
    distances, indices = knn.kneighbors(user_profile)
    recommended_songs_df = songs.iloc[indices[0]][['track_name', 'id']]
    return recommended_songs_df


In [30]:
recommended_songs_df = bob_scratch.iloc[indices[0]][['track_name', 'id']]

print("Recommended Songs:")
for song in recommended_songs_df['track_name']:
    print(song)

Recommended Songs:
Honey, Just Allow Me One More Chance
Desolation Row - Live at Royal Albert Hall, London, UK - May 26, 1966
Jesus Is the One - Live July 17, 1981
Jack-a-Roe
Song for Canada
There's a Flaw in My Flue
Rambling, Gambling Willie - Witmark Demo - 1962
You Don't Have to Do That - Take 1, Solo Acoustic
What Can I Do for You? - Live at Golden Hall, San Diego, CA - November 27, 1979
Going, Going, Gone
I Dreamed I Saw St. Augustine - Live at Montreal Forum, Montreal, Quebec - December 1975
Long Ago, Far Away - Witmark Demo - 1962
With God on Our Side - Live at Carnegie Hall, New York, NY - October 1963
The Girl On the Greenbriar Shore - Live at Cote d'Opale, Dunkerque, France - June 1992
I Threw It All Away - Live at Nippon Budokan Hall, Tokyo, Japan - March 1, 1978
I Threw It All Away - Live with The Band, Isle Of Wight - Remixed and Remastered 2013
It's All Over Now, Baby Blue
Big River - Take 2
I Am a Lonesome Hobo
Sitting On a Barbed Wire Fence - Take 2
Annie's Going to Sin

In [31]:
recommended_songs_df

Unnamed: 0,track_name,id
1099,"Honey, Just Allow Me One More Chance",71XTdtW3bvmIrotG8yYVMS
947,"Desolation Row - Live at Royal Albert Hall, Lo...",4hIOKB9UN6Cj4lOkn1CAxC
249,"Jesus Is the One - Live July 17, 1981",2CwVWO0y729HN867N0QiwS
485,Jack-a-Roe,43A4KdBB6wlzRqaISolzqV
891,Song for Canada,07CPKnNDAOWjHPgxVkhrUE
276,There's a Flaw in My Flue,6OwmxcxlraKhUXofqQwgNK
1112,"Rambling, Gambling Willie - Witmark Demo - 1962",2TMJrLYSGo9ys1rD7sM9Bc
1014,"You Don't Have to Do That - Take 1, Solo Acoustic",5HgpySxSUjT7wP326hXcXP
218,"What Can I Do for You? - Live at Golden Hall, ...",1jzDM6sfisy1hVuiwB4Vr2
778,"Going, Going, Gone",5jZLCVKppwm7dGxvb68fTW


In [35]:
track_ids = recommended_songs_df['id'].tolist()

In [37]:
track_info = sp.tracks(track_ids)

In [40]:
track_info_df = pd.DataFrame(track_info['tracks'])

In [32]:
user_id = me['id']

In [33]:
sp.user_playlist_create('stg58661521',"FindYOurBob", public=True, collaborative=False, description='Discovering your personal slice of Bob')

{'collaborative': False,
 'description': 'Discovering your personal slice of Bob',
 'external_urls': {'spotify': 'https://open.spotify.com/playlist/6OdVHNL4mf8lcWVhmUPUDo'},
 'followers': {'href': None, 'total': 0},
 'href': 'https://api.spotify.com/v1/playlists/6OdVHNL4mf8lcWVhmUPUDo',
 'id': '6OdVHNL4mf8lcWVhmUPUDo',
 'images': [],
 'name': 'FindYOurBob',
 'owner': {'display_name': 'Sam Goodson',
  'external_urls': {'spotify': 'https://open.spotify.com/user/stg58661521'},
  'href': 'https://api.spotify.com/v1/users/stg58661521',
  'id': 'stg58661521',
  'type': 'user',
  'uri': 'spotify:user:stg58661521'},
 'primary_color': None,
 'public': True,
 'snapshot_id': 'MSwwMzJlZjE2MDNmNTg2NWRlOWUyNjY3OGFlN2MyODczZWQ3N2JhZjBj',
 'tracks': {'href': 'https://api.spotify.com/v1/playlists/6OdVHNL4mf8lcWVhmUPUDo/tracks',
  'items': [],
  'limit': 100,
  'next': None,
  'offset': 0,
  'previous': None,
  'total': 0},
 'type': 'playlist',
 'uri': 'spotify:playlist:6OdVHNL4mf8lcWVhmUPUDo'}

In [45]:
tracks = ["71XTdtW3bvmIrotG8yYVMS", "2gkFEceXTqBsBwvK18TgqL"]

In [60]:
sp.user_playlist_add_tracks(user_id, '6OdVHNL4mf8lcWVhmUPUDo', track_ids, position=None)

{'snapshot_id': 'NCw0MGU0NzA1Y2NkZGE4MzQ1YTZkZTk2NGEyNzE4YTdhYmZiZTAxZGYw'}

In [None]:
def create_and_fill_playlist(recommended_songs_df,user):
    user_id = user['id']
    playlist = sp.user_playlist_create('stg58661521',"FindYourBob", public=True, collaborative=False, description='Discovering your personal slice of Bob')
    track_ids = recommended_songs_df['id'].tolist()
    sp.user_playlist_add_tracks(user_id, playlist['id'], track_ids, position=None)
    print("Playlist created and filled with recommended songs.")

app scratch

In [62]:
def find_closest_album(user_raw,album_features, feature_columns):
    max_similarity = -1  
    closest_album = None

    averages_features = [user_raw[col] for col in feature_columns]
    user_features = np.array(averages_features).flatten()

    for index, row in album_features.iterrows():
        album_features = row[feature_columns].tolist()  
        similarity = 1 - distance.cosine(user_features, album_features)  
        if similarity > max_similarity:
            max_similarity = similarity
            closest_album = row['album_name']  

    return closest_album

In [63]:
def create_and_fill_playlist(recommended_songs_df,user):
    user_id = user['id']
    playlist = sp.user_playlist_create('stg58661521',"FindYourBob", public=True, collaborative=False, description='Discovering your personal slice of Bob')
    track_ids = recommended_songs_df['id'].tolist()
    sp.user_playlist_add_tracks(user_id, playlist['id'], track_ids, position=None)
    print("Playlist created and filled with recommended songs.")

In [61]:
from dash import Dash, html, dcc, callback, Output, Input,State

In [76]:
app = Dash(__name__)

In [77]:
app.layout = html.Div([
    html.Div([
    html.H1('Find Your Bob'),
    html.P('Discover your personal slice of Bob'),
    ],style={'font-family': 'Georgia','padding': '10px','textAlign': 'center'}),
    html.Div([
        html.Button('Find your bob Album', id='album-button', n_clicks=0)
    ], style={'textAlign': 'center'}),
    html.Div(id='album-recommendation'),
    html.Div([
        html.Button('Create Playlist', id='playlist-button', n_clicks=0)
    ], style={'textAlign': 'center'}),
    html.Div(id='playlist-creation')
])

@app.callback(
    Output('album-recommendation', 'children'),
    [Input('album-button', 'n_clicks')]
)
def recommend_album(n_clicks):
    closest_album = find_closest_album(averages, bob_album_ag, feature_columns)
    if n_clicks > 0:
        return f"The closest album to the user's preferences is: {closest_album}"
    
@app.callback(
    Output('playlist-creation', 'children'),
    [Input('playlist-button', 'n_clicks')]
)
def create_playlist(n_clicks):
    if n_clicks > 0:
        create_and_fill_playlist(recommended_songs_df, me)

In [78]:
app.run_server(debug=True)