# Music Recommendation System Using Spotify API

## Importing the files and datasets

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
import os
import plotly.express as px
%matplotlib inline

In [None]:
spotify_data = pd.read_csv('C:/Users/Devanshi/Desktop/All recent required folders/Music Recommendation System using Spotify API/data.csv')
genre_data = pd.read_csv('C:/Users/Devanshi/Desktop/All recent required folders/Music Recommendation System using Spotify API/data_by_genres.csv')
data_by_year = pd.read_csv('C:/Users/Devanshi/Desktop/All recent required folders/Music Recommendation System using Spotify API/data_by_year.csv')

In [None]:
genre_data.head()

In [None]:
data_by_year.head()

In [None]:
spotify_data.head()

## Exploratory Data Analysis 

In [None]:
data_by_year.plot.line('year',['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'valence'],figsize=(10,10))

In [None]:
top10_genres = genre_data.nlargest(15, 'popularity')
fig = px.bar(top10_genres, x='genres', y=['valence', 'energy', 'danceability', 'acousticness'], barmode='group')
fig.show()

In [None]:
import plotly.figure_factory as ff
hist_data=[genre_data['valence'].tolist(), genre_data['energy'].tolist(), genre_data['danceability'].tolist(), genre_data['acousticness'].tolist()]
group_labels=['valence', 'energy', 'danceability', 'acousticness']
fig = ff.create_distplot(hist_data,group_labels, bin_size=.2, show_rug=False)
fig.show()

## K Means Algorithm Model 

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

song_cluster_pipeline = Pipeline([('scaler', StandardScaler()), 
                                  ('kmeans', KMeans(n_clusters=4, 
                                   verbose=2))],verbose=True)
X = spotify_data.select_dtypes(np.number)
number_cols = list(X.columns)
print(number_cols)
song_cluster_pipeline.fit(X)
song_cluster_labels = song_cluster_pipeline.predict(X)
spotify_data['cluster_label'] = song_cluster_labels

In [None]:
import joblib
# Save the model as a pickle in a file
joblib.dump(song_cluster_pipeline, 'kmeans.pkl')

In [None]:
from sklearn.decomposition import PCA
pca_pipeline = Pipeline([('scaler', StandardScaler()), ('PCA', PCA(n_components=2))])
song_embedding = pca_pipeline.fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=song_embedding)
projection['title'] = spotify_data['name']
projection['cluster'] = spotify_data['cluster_label']

In [None]:
import plotly.express as px
fig = px.scatter(projection, x='x', y='y', color='cluster', hover_data=['x', 'y', 'title'])
fig.show()

## Suggestion based recommendation 

In [None]:
number_cols = ['acousticness', 'danceability', 'duration_ms', 'energy', 'explicit', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo', 'valence', 'year']

In [None]:
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=os.environ["SPOTIFY_CLIENT_ID"],
                                                           client_secret=os.environ["SPOTIFY_CLIENT_SECRET"]))


def find_song(name, year):
  
    """
    This function returns a dataframe with data for a song given the name and release year.
    The function uses Spotipy to fetch audio features and metadata for the specified song.
    
    """
    
    song_data = defaultdict()
    results = sp.search(q= 'track: {} year: {}'.format(name,year), limit=1)
    if results['tracks']['items'] == []:
        return None
    
    results = results['tracks']['items'][0]
    track_id = results['id']
    audio_features = sp.audio_features(track_id)[0]
    
    song_data['name'] = [name]
    song_data['year'] = [year]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['popularity'] = [results['popularity']]
    
    for key, value in audio_features.items():
        song_data[key] = value
    p=pd.DataFrame(song_data)
    p=p[number_cols]
    #print('From find_song',p)
    # Load the model from the file
    kmeans_from_joblib = joblib.load('kmeans.pkl') 
    X_test=p
    # Use the loaded model to make predictions
    p['cluster_label']=kmeans_from_joblib.predict(X_test)
    return pd.DataFrame(p)

In [None]:
from collections import defaultdict
from scipy.spatial.distance import cdist
import difflib



def get_song_data(song):
    
    """
    Gets the song data for a specific song. The song argument takes the form of a dictionary with 
    key-value pairs for the name and release year of the song.
    """
    
    try:
        song_data = spotify_data[(spotify_data['name'] == song)].iloc[0]
        return song_data
    
    except IndexError:
        return find_song(song)

def get_mean_vector(song):
  
    """
    Gets the mean vector for a list of songs.
    """
    
    song_data = get_song_data(song)
    if song_data is None:
        print('Warning: {} does not exist in Spotify or in database'.format(song))
    song_vector = song_data[number_cols]
    return song_vector


In [None]:
def recommend_song(song,n_songs):
    l=[]
    song_vector= get_mean_vector(song)
    clusterno=spotify_data.loc[spotify_data['name'] == song]
    c=clusterno.iloc[0,19]
    cluster_data=spotify_data.loc[spotify_data['cluster_label']==c]
    cluster_data=cluster_data['name']
        
    for n in cluster_data:
        s_vector=get_mean_vector(n)
        l.append(s_vector)
    distances = cdist(l,[song_vector], 'cosine').ravel().tolist()
    index=list(np.argsort(distances))[:n_songs+15]
    print(index)
    cl=cluster_data.to_dict() 
    a=[]
    a=list(cl.keys())
    print(a)
    print(len(a))
    n=[]
    for i in index:
        n.append(a[i])
        
    metadata_cols = ['name', 'year', 'artists']
    rec_songs = spotify_data.iloc[n]
    rec_songs = rec_songs[rec_songs['name']!=song]
    rec_songs.drop_duplicates(['name'],inplace=True)
    rec_songs_final=rec_songs.iloc[:n_songs]

    return(rec_songs_final[metadata_cols].to_dict(orient='records'))

    
    
    

    
    
        

In [None]:
def get_song_suggest(n,v):
    ans1=recommend_song(n,v)
    return ans1

## Mood based recommendation

In [None]:
from spotipy.oauth2 import SpotifyClientCredentials
playlistsid =['7GhawGpb43Ctkq3PRP1fOL','37i9dQZF1DX7qK8ma5wgG1','71Xpaq3Hbpxz6w9yDmIsaH','1ofJSOJDpcRRBW6tMOyfdv']
mood=['happy','sad','angry','calm']
Moods = pd.DataFrame()
sp=spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id='75bba63b8ee14a569444f2abe5c5f04c',client_secret='b293bd9a8da24984834e59986ecf92b0'))

for i in range(4):
    print(i)
    offset = 0
    tracks = []
    # Make the API request
    while True:
            content = sp.playlist_tracks( playlistsid[i], fields=None, limit=None, offset=offset, market=None)
            tracks += content['items']

            if content['next'] is not None:
                continue
            else:
                break

    track_id = []
    track_name = []
    track_explicit=[]
    track_popularity=[]

    for track in tracks:
        track_id.append(track['track']['id'])
        track_name.append(track['track']['name'])
        track_explicit.append(int(track['track']['explicit']))
        track_popularity.append(track['track']['popularity'])

    features = []
    for i in range(0,len(track_id)):
        audio_features = sp.audio_features(track_id[i])
        for track in audio_features:
            features.append(track)

    playlist_df = pd.DataFrame(features, index = track_name)
    playlist_df.drop(['type','id','uri','track_href','analysis_url','time_signature'],axis=1,inplace=True)
    playlist_df['explicit']=track_explicit
    playlist_df['popularity']=track_popularity
    df=pd.DataFrame(playlist_df.mean(axis=0)).transpose()
    Moods=Moods.append(df)

Moods['mood']=mood
Moods.set_index('mood',inplace=True)
Moods.index.name=None
Moods.drop(['key','mode','explicit','duration_ms','liveness','popularity','speechiness','acousticness','instrumentalness'],inplace=True,axis=1)
display(Moods)


In [None]:
dc=spotify_data.copy()
dc.drop(['duration_ms','release_date','year','cluster_label','key','mode','explicit','duration_ms','liveness','popularity','speechiness','acousticness','instrumentalness'],inplace=True,axis=1)
dc=dc[['danceability','energy','loudness', 'valence','tempo','name','artists','id']]
dc.head()

In [None]:
def get_mood(m):
    suitable_songs=[]
    suitable_artists=[]
    suitable_id=[]
    for i in range(len(spotify_data['danceability'])):
        if m=='happy':
            if ((Moods.iloc[0,0]<= dc.iloc[i,0]<= 1) and (Moods.iloc[0,1]<=dc.iloc[i,1]<=Moods.iloc[0,1]+0.2) 
            and (Moods.iloc[0,2]-5<=dc.iloc[i,2]<=Moods.iloc[0,2]+2) and (Moods.iloc[0,3]<=dc.iloc[i,3]<=1) 
            and (Moods.iloc[0,4]-5<=dc.iloc[i,4]<=Moods.iloc[0,4]+5)):
                suitable_songs.append(dc.iloc[i,5])
                suitable_artists.append(dc.iloc[i,6])
                suitable_id.append(dc.iloc[i,7])
        elif m=='sad':
            if ((Moods.iloc[1,0]-0.2<= dc.iloc[i,0]<= Moods.iloc[1,0]+0.15) and (Moods.iloc[1,1]<=dc.iloc[i,1]<=Moods.iloc[1,1]+0.1) 
            and (dc.iloc[i,2]<=Moods.iloc[1,2]+1) and (Moods.iloc[1,3]<=dc.iloc[i,3]<= Moods.iloc[1,3]+0.12) 
            and (Moods.iloc[1,4]-20<=dc.iloc[i,4]<=Moods.iloc[1,4])):
                suitable_songs.append(dc.iloc[i,5])
                suitable_artists.append(dc.iloc[i,6])
                suitable_id.append(dc.iloc[i,7])
        elif m=='angry':
            if ((Moods.iloc[2,0]<= dc.iloc[i,0]<= Moods.iloc[2,0]+0.05) and (Moods.iloc[2,1]<=dc.iloc[i,1]<=1) 
            and (Moods.iloc[2,2]<=dc.iloc[i,2]<=0) and (Moods.iloc[2,3]-0.05<=dc.iloc[i,3]<= Moods.iloc[2,3]+0.05) 
            and (Moods.iloc[2,4]<=dc.iloc[i,4])):
                suitable_songs.append(dc.iloc[i,5])
                suitable_artists.append(dc.iloc[i,6])
                suitable_id.append(dc.iloc[i,7])
        elif m=='calm':
            if ((0<= dc.iloc[i,0]<= Moods.iloc[3,0]) and (Moods.iloc[3,1]<=dc.iloc[i,1]<=Moods.iloc[3,1]+0.15) 
            and (Moods.iloc[3,2]-7<=dc.iloc[i,2]<=Moods.iloc[3,2]+3) and (Moods.iloc[3,3]<=dc.iloc[i,3]<=1) 
            and (dc.iloc[i,4]<=Moods.iloc[3,4])):
                suitable_songs.append(dc.iloc[i,5])
                suitable_artists.append(dc.iloc[i,6])
                suitable_id.append(dc.iloc[i,7])
    
    suitable= pd.DataFrame({'name':suitable_songs,'artists':suitable_artists,'id':suitable_id})
    
                
    return suitable

In [None]:
h=get_mood('happy')
h.drop_duplicates(['name','artists'],inplace=True)
s=get_mood('sad')
s.drop_duplicates(['name','artists'],inplace=True)
a=get_mood('angry')
a.drop_duplicates(['name','artists'],inplace=True)
c=get_mood('calm')
c.drop_duplicates(['name','artists'],inplace=True)

In [None]:
def get_mood_song(m):
    if m=='happy':
        m1= h.sample()
    elif m=='sad':
        m1=s.sample()
    elif m=='angry':
        m1=a.sample()
    elif m=='calm':
        m1=c.sample()
    ans2=recommend_song(m1.iloc[0,0],10)
    text8=Label(frame6,text='Recommended Songs are:',font=('Century',12))
    text8.pack(pady=5)
    for x in ans2:
        label = Label(frame6,text=x,font=('Century',10)) #set your text
        label.pack(pady=5,anchor='w')  

## Tkinter  

In [None]:
from tkinter import *
from threading import *

In [None]:
#Create an instance of Tkinter frame or window
win= Tk()
#Set the geometry of tkinter frame
win.geometry("750x750")
win.title('Music Recommendation System')
win.iconbitmap('C:/Users/Devanshi/Desktop/All recent required folders/Music Recommendation System using Spotify API/disc.ico')

frame1=Frame(win)
frame1.pack()

text1= Label(frame1, text="Recommend Music based on:", font= ('Century',15))
text1.pack(ipadx=20,ipady=20)

frame2=Frame(win)
frame2.pack()
frame3=Frame(win)
frame3.pack()
frame4=Frame(win)
frame4.pack()

    
def suggestion_based():
    frame1.forget()
    text2=Label(frame2,text='Suggestion Based Recommendation',font=('Century',15))
    text2.pack(pady=15)
    text3=Label(frame2,text='Enter the Song:', font=('Century',12))
    text3.pack(pady=15,side=LEFT)
    nameEntered = Entry(frame2, width = 50)
    nameEntered.pack(pady=15,side=LEFT)
    text4=Label(frame3,text='Enter number of songs to recommend:',font=('Century',12))
    text4.pack(pady=5,side=LEFT)
    valEntered=Entry(frame3,width=23)
    valEntered.pack(pady=5,side=LEFT)
    def threading1():
        t2=Thread(target=callback)
        t2.start()
    def callback():
        name=nameEntered.get()
        val=int(valEntered.get())
        global ans
        ans=get_song_suggest(name,val)
        text7=Label(frame4,text='Recommended Songs are:',font=('Century',12))
        text7.pack(pady=5)
        for x in ans:
            label = Label(frame4,text=x,font=('Century',10)) #set your text
            label.pack(pady=5,anchor='w')
    
        
        
    btn9=Button(frame4,text='Ok',width=20,font=('Century',12),command=threading1)
    btn9.pack()
    btn7= Button(frame4,text='Exit',width=20,font=('Century',12),command=win.destroy)
    btn7.pack(side=BOTTOM,pady=10)
    



            
btn1= Button (frame1, text= 'Suggestion',width=20,font=('Century',12),command=suggestion_based)
btn1.pack(pady=5)

frame5=Frame(win)
frame5.pack()
frame6=Frame(win)
frame6.pack()

def threading(m):
    t1=Thread(target= lambda: get_mood_song(m))
    # Call work function
    t1.start()
    
def mood_based ():
    frame1.forget()
    text5=Label(frame5,text='Mood Based Recommendation',font=('Century',15))
    text5.pack(pady=15)
    text6=Label(frame5,text='How are you feeling today?',font=('Century',12))
    text6.pack(pady=5)
    btn3=Button(frame5,text='Happy',width=20,font=('Century',11),command=lambda: threading('happy'))
    btn3.pack(pady=5,side=LEFT)
    btn4=Button(frame5,text='Sad',width=20,font=('Century',11),command=lambda: threading('sad'))
    btn4.pack(pady=5,side=LEFT)
    btn5=Button(frame5,text='Angry',width=20,font=('Century',11),command=lambda: threading('angry'))
    btn5.pack(pady=5,side=LEFT)
    btn6=Button(frame5,text='Calm',width=20,font=('Century',11),command=lambda: threading('calm'))
    btn6.pack(pady=5,side=LEFT)
    btn7= Button(frame6,text='Exit',width=20,font=('Century',12),command=win.destroy)
    btn7.pack(side=BOTTOM,pady=10)

    
btn2=Button(frame1, text='Mood',width=20,font=('Century',12),command=mood_based)
btn2.pack(pady=5)





win.mainloop()