In [1]:
!pip install spotipy
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import streamlit as st
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import plotly.express as px
from collections import defaultdict
from scipy.spatial.distance import cdist
import time
cid = 'YOUR CLIENT ID'
secret = 'YOUR CLIENT SECRET'
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager
=
client_credentials_manager)



In [2]:
df = pd.read_csv('fourtet.csv')

In [3]:
def find_song(name, artist):
  
    """
    This function returns a dataframe with data for a song given the name and artist.
    The function uses Spotipy to fetch audio features and metadata for the specified song.
    
    """
    
    song_data = defaultdict()
    results = sp.search(q= 'track: {} artist: {}'.format(name,
                                                       artist), limit=1)
    if results['tracks']['items'] == []:
        return None
    
    results = results['tracks']['items'][0]

    track_id = results['id']
    audio_features = sp.audio_features(track_id)[0]
    
    song_data['name'] = [name]
    song_data['artist'] = [artist]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['duration_ms'] = [results['duration_ms']]
    song_data['popularity'] = [results['popularity']]
    
    for key, value in audio_features.items():
        song_data[key] = value
    
    return pd.DataFrame(song_data)

In [4]:
def get_song_data(song, spotify_data):
    
    """
    Gets the song data for a specific song. The song argument takes the form of a dictionary with 
    key-value pairs for the name and release year of the song.
    """
    
    try:
        song_data = spotify_data[(spotify_data['track_name'] == song['name']) 
                                & (spotify_data['artist_name'] == song['artist'])].iloc[0]
        return song_data
    
    except IndexError:
        return find_song(song['name'], song['artist'])

In [5]:
def get_mean_vector(song_list, spotify_data):
  
    """
    Gets the mean vector for a list of songs.
    """
    
    song_vectors = []
    number_cols = ['valence', 'acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo']
    for song in song_list:
        song_data = get_song_data(song, spotify_data)
        if song_data is None:
            print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
            continue
        song_vector = song_data[number_cols].values
        song_vectors.append(song_vector)  
    
    song_matrix = np.array(list(song_vectors))
    return np.mean(song_matrix, axis=0)

In [6]:
def flatten_dict_list(dict_list):
   
    """
    Utility function for flattening a list of dictionaries.
    """
    
    flattened_dict = defaultdict()
    for key in dict_list[0].keys():
        flattened_dict[key] = []
    
    for dictionary in dict_list:
        for key, value in dictionary.items():
            flattened_dict[key].append(value)
            
    return flattened_dict

In [7]:
def recommend_songs(song_list, spotify_data, n_songs=10):
  
    """
    Recommends songs based on a list of previous songs that a user has listened to.
    """
    
    metadata_cols = ['track_name', 'artist_name']
    number_cols = ['valence', 'acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo']
    song_dict = flatten_dict_list(song_list)
    
    song_center = get_mean_vector(song_list, spotify_data)
    spotify_data = spotify_data.drop(['popularity', 'Unnamed: 0'], axis=1)
    X = spotify_data.select_dtypes(np.number)
    cluster_pipeline = Pipeline([('scaler', StandardScaler()), ('kmeans', KMeans(n_clusters=5))])
    cluster_pipeline.fit(X)
    cluster_labels = cluster_pipeline.predict(X)
    spotify_data['cluster'] = cluster_labels
    scaler = cluster_pipeline.steps[0][1]
    scaled_data = scaler.transform(spotify_data[number_cols])
    scaled_song_center = scaler.transform(song_center.reshape(1, -1))
    distances = cdist(scaled_song_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_songs][0])
    
    rec_songs = spotify_data.iloc[index]
    rec_songs = rec_songs[~rec_songs['track_name'].isin(song_dict['name'])]
    df_recs = pd.DataFrame(rec_songs[metadata_cols])
    return df_recs

In [74]:
song_list = [{'name': 'Stairway to Heaven', 'artist': 'Led Zeppelin'}]
rec_df = recommend_songs(song_list, df, 10)
rec_df

Unnamed: 0,track_name,artist_name
1149,Deep Summer - Burial Remix,Mønic
613,Circular Time,Don't DJ
1552,A Love Supreme,Alice Coltrane
1367,Taqsīm Maqām Ājam,Rahim AlHaj
206,Now Jazz Ramwong,Albert Mangelsdorff Quintet
1366,Peace for Earth,Four Tet
94,Movement 6,Floating Points
1313,And the Darkest Hour is Just Before Dawn,Daniel Schmidt
887,Autumn Signal,Joan La Barbara
854,Never Saw Him Again,Mary Lattimore


In [35]:
'''results_list = []
results = sp.search(q='track: {} artist: {}'.format('So Sick', 'Ne-Yo'), limit=1)
results_list.append(results['tracks']['items'][0]['external_urls']['spotify'])
results_list'''

['https://open.spotify.com/track/6brl7bwOHmGFkNw3MBqssT']

In [92]:
results_list = []
try:
    results = sp.search(q="track: {} artist: {}".format('Peace For Earth', 'Four Tet'), limit=1)
    results_list.append(results['tracks']['items'][0]['external_urls']['spotify'])
except IndexError:
    pass
results_list

['https://open.spotify.com/track/78QUWHmZOv0lv5OKSoqAPN']

In [93]:
results_list = []
for i, j in rec_df.itertuples(index=False):
    try:
        results = sp.search(q='track: {} artist: {}'.format(i, j), limit=1)
        results_list.append(results['tracks']['items'][0]['external_urls']['spotify'])
    except IndexError:
        pass
results_list

['https://open.spotify.com/track/1NgPFhN5fvx7Z3EgWckRQ8',
 'https://open.spotify.com/track/1VT4hCdbHrn87tfXXGyOOp',
 'https://open.spotify.com/track/4PQcpuXtjA1OGO9tcnbDSH',
 'https://open.spotify.com/track/09PnZqSJcxRh71NBZMLftN',
 'https://open.spotify.com/track/78QUWHmZOv0lv5OKSoqAPN',
 'https://open.spotify.com/track/5i0EqAX50KcKNgMDMHZndM',
 'https://open.spotify.com/track/4jIsolrxzAUdBu5x5ExGPJ',
 'https://open.spotify.com/track/08ExsbBLAWHCxhhWofC98J',
 'https://open.spotify.com/track/45dAAA2dnaEe8HZj2lVf6D']