# Music Recommendation System

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import configparser

In [None]:
config = configparser.ConfigParser()
config.read('config.cfg')
cid = config.get('SPOTIFY', 'CLIENT_ID')
secret = config.get('SPOTIFY', 'CLIENT_SECRET')
username = config.get('SPOTIFY', 'USERNAME')
url = config.get('SPOTIFY', 'URL')
userid = config.get('SPOTIFY', 'USERID')

In [None]:
#Credentials authentication 
scope = 'user-library-read playlist-read-private playlist-modify-public playlist-modify-private user-library-modify'
token = util.prompt_for_user_token(username, scope, client_id=cid, client_secret=secret, 
                                   redirect_uri=url)
if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)

In [None]:
#Create a playlist containing songs you disliked, add atleast 1000 songs to it
def create_data(userid, playlists):
    tracks = {}
    items = []
    c=0
    for playlist in playlists:
        sourcePlaylist = sp.user_playlist(userid, playlist)
        track =  sourcePlaylist["tracks"]
        tracks.update(track)
        items = items + track["items"]
    return [tracks, items]

In [None]:
print("Enter the ids of playlist which only contains the song you liked seprate each id with space")
liked = input().strip().split(" ")
print(liked)

In [None]:
#Get Good songs ids
[good_tracks, good_songs] = create_data(userid, liked)

while good_tracks['next']:
    good_tracks = sp.next(good_tracks)
    for item in good_tracks["items"]:
        good_songs.append(item)
good_ids = []

for i in range(len(good_songs)):
    good_ids.append(good_songs[i]['track']['id'])

In [None]:
print("Enter the ids of playlist which only contains the song you disliked seprate each id with space")
disliked = input().strip().split(" ")

In [None]:
#Get bad songs ids
[bad_tracks, bad_songs] = create_data(userid, disliked)
while bad_tracks['next']:
    bad_tracks = sp.next(bad_tracks)
    for item in bad_tracks["items"]:
        bad_songs.append(item)
bad_ids = []
for i in range(len(bad_songs)):
    bad_ids.append(bad_songs[i]['track']['id'])

In [None]:
features = []
print(len(good_ids))
c=0
for i in range(0,len(good_ids),50):
    audio_features = sp.audio_features(good_ids[i:i+50])
    for track in audio_features:
        if track != None:
            c += 1
            features.append(track)
            features[-1]['target'] = 1
print(c)

In [None]:
print(len(bad_ids))
c = 0
for i in range(0,len(bad_ids),50):
    audio_features = sp.audio_features(bad_ids[i:i+50])
    for track in audio_features:
        if track != None:
            c += 1
            features.append(track)
            features[-1]['target'] = 0
print(c)

In [None]:
import pandas as pd
trainingData = pd.DataFrame(features)
from sklearn.model_selection import train_test_split
train, test = train_test_split(trainingData, test_size = 0.10, shuffle=True)

# Graphs

In [None]:
import matplotlib.pyplot as plt
goods = {}
bads = {}
feature = ["danceability", "loudness", "valence", "energy", "instrumentalness", "acousticness", "key", "speechiness", "duration_ms"]
for fs in feature:
    good = trainingData[trainingData['target'] == 1][fs]
    bad = trainingData[trainingData['target'] == 0][fs]
    goods[fs] = good
    bads[fs] = bad
for fs in feature:
    fig = plt.figure(figsize=(8,8))
    plt.title("Song "+fs+" Like / Dislike Distribution")
    plt.xlabel(fs)
    plt.ylabel("No. of songs")
    goods[fs].hist(alpha=0.7, bins=30, label='positive')
    bads[fs].hist(alpha=0.7, bins=30, label='negative')
    plt.legend(loc='upper right')

# Training and Testing

In [None]:
#Define the set of features that we want to look at
feature = ["danceability", "loudness", "valence", "energy", "instrumentalness", "acousticness", "key", "speechiness", "duration_ms"]
#Split the data into x and y test and train sets to feed them into a bunch of classifiers!
x_train = train[feature]
y_train = train["target"]
x_test = test[feature]
y_test = test["target"]

In [None]:
scores = {}
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
dtc = DecisionTreeClassifier(min_samples_split=100)
dtc.fit(x_train, y_train)
y_pred = dtc.predict(x_test)
score = accuracy_score(y_test, y_pred) * 100
scores['dtc'] = score
print("Accuracy using Decision Tree: ", round(score, 1), "%")

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(9)
knn.fit(x_train, y_train)
knn_pred = knn.predict(x_test)
score = accuracy_score(y_test, knn_pred) * 100
scores['knn'] = score
print("Accuracy using Knn: ", round(score, 1), "%")

from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=31, random_state=111)
rfc.fit(x_train, y_train)
rfc_pred = knn.predict(x_test)
score = accuracy_score(y_test, rfc_pred) * 100
scores['rfc'] = score
print("Accuracy using Random Forest classifier: ", round(score, 1), "%")

from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier(n_estimators=62, random_state=111)
abc.fit(x_train, y_train)
abc_pred = knn.predict(x_test)
score = accuracy_score(y_test, abc_pred) * 100
print("Accuracy using AdaBoost Classifier: ", round(score, 1), "%")


from sklearn.ensemble import BaggingClassifier
bc = BaggingClassifier(n_estimators=9, random_state=111)
bc.fit(x_train, y_train)
bc_pred = knn.predict(x_test)
score = accuracy_score(y_test, bc_pred) * 100
scores['bc'] = score
print("Accuracy using Bagging Classifier: ", round(score, 1), "%")

from sklearn.ensemble import ExtraTreesClassifier
etc = ExtraTreesClassifier(n_estimators=9, random_state=111)
etc.fit(x_train, y_train)
etc_pred = knn.predict(x_test)
score = accuracy_score(y_test, etc_pred) * 100
scores['etc'] = score
print("Accuracy using Bagging Classifier: ", round(score, 1), "%")

from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=.1, max_depth=1, random_state=0)
gbc.fit(x_train, y_train)
predicted = gbc.predict(x_test)
score = accuracy_score(y_test, predicted)*100
scores['gbc'] = score
print("Accuracy using Gbc: ", round(score, 1), "%")

In [None]:
import matplotlib.pyplot as plt
plt.xlabel('Classifier')
plt.ylabel('Accuracy')
plt.title('Recommendation Accuracy using different classifier')
plt.bar(range(len(scores)), list(scores.values()), align='center')
plt.xticks(range(len(scores)), list(scores.keys()))
plt.show()

# Adding Recommended songs to playlist

In [None]:
print("Enter the ids of playlist from which you want recommended songs")
like = input().strip().split(" ")
title = input("Title of Playlist that will contain songs you liked:: ")
songs = []
for playlist in like:
    sourcePlaylist = sp.user_playlist(userid, playlist)
    tracks = sourcePlaylist["tracks"]
    song = tracks["items"]
    while tracks['next']:
        tracks = sp.next(tracks)
        for item in tracks["items"]:
            song.append(item)
    ids = [] 
    print(len(song))
    print(song[0]['track']['id'])
    i = 0
    for i in range(len(song)):
        songs.append(song[i]["track"]["id"])
        
print(len(songs))
c = 0
features = []
for i in range(0,len(songs),50):
    audio_features = sp.audio_features(songs[i:i+50])
    for track in audio_features:
        if track != None:
            c += 1
            features.append(track)
print(c)

dataset = pd.DataFrame(features)
test = dataset[feature]
predicted = gbc.predict(test)
rec_songs = []
for i in range(len(predicted)):
    if predicted[i] == 1:
        rec_songs.append(songs[i])

In [None]:
playlists = sp.user_playlists(userid)['items']
exist = False
for playlist in playlists:
    if playlist['name'] == title:
        liked_list = playlist
        exist = True
        break
if not exist:
    liked_list = sp.user_playlist_create(userid, title, True)
for song in rec_songs:
    sp.user_playlist_add_tracks(userid, liked_list['id'], [song])