# Song Recommender

## Connecting to API 

In [1]:
import config
import pandas as pd
import time
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets # sklearn comes with some toy datasets to practise
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
from kneed import KneeLocator
from IPython.display import IFrame
from helpers import play_song
from helpers import get_playlist_tracks
from helpers import load




In [2]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [3]:
config.client_id
config.client_secret

'7600aaf3d2c548358653f125b5caecbf'

## Defining necessary functions to create a data pipeline and obtain a dataframe with audio features of diverse tracks

In [4]:
def get_playlist_tracks(username, playlist_id):
    try:
        
        results = sp.user_playlist_tracks(username,playlist_id,market="GB")
        tracks = results['items']
        while results['next']:
            try:
                results = sp.next(results)
                tracks.extend(results['items'])
            except:
                pass
    except:
        pass
    return tracks

In [5]:
def pl_collector(genres):
    playlists_all = []                     # searching for playlists which contain keywords in genres list



    for id, genre in enumerate(genres):
    
        print(id)
        try:
            pl_result = sp.search(q=genre, limit=50,type='playlist',market='GB')
            playlists = pl_result['playlists']['items']
            playlists_all.extend(playlists)
        except:
            pass
    return playlists_all

In [6]:
def pl_filter(playlists_all):                #filtering tracklist ids that have >100 songs 

    
    playlists_200 = []

    for i in range(len(playlists_all)):
    
        no_tracks = playlists_all[i]['tracks']['total']
        if no_tracks > 200:
            playlists_200.append(playlists_all[i]['id'])

    no_of_tracks = 0
    for item in playlists_all:
            if item['id'] in playlists_200:
                no = item['tracks']['total']
                no_of_tracks += no
    return playlists_200

In [7]:
def track_collector(playlists_200):
        
    
    all_tracks = []
    for id in playlists_200:
        tracks = get_playlist_tracks("spotify", id)
        all_tracks.extend(tracks)
    return all_tracks
    


In [8]:
def track_ids(all_tracks):    
    track_ids = []
    

    for item in all_tracks: 
        try:
                                                                #Collecting ids of all tracks in one list
            id_no = item['track']['id']
            track_ids.append(id_no)
        except:
            pass
    return track_ids

In [9]:
def audio_feature_collector(track_ids):             #returns a list of audio features of all tracks 
    
    list_of_audio_features=[]

    

    for it, id_no in enumerate(track_ids):
        

        
        try:
            list_of_audio_features.append(sp.audio_features(id_no)[0])
            moment = time.time()
            if it%1000==0:
                time.sleep(3)
                print('{} iterations complete'.format(it))
        except:

            pass
           
    return list_of_audio_features

In [10]:
def pipeline(lst_of_genre):
    pls = pl_collector(lst_of_genre)
    print('playlists are collected')
    pl_filtered = pl_filter(pls)
    print('playlists are filtered, {} playlists were found'.format(len(pl_filtered)))
    all_tracks = track_collector(pl_filtered)
    print('track ids are collected')
    print('In total {} tracks were obtained'.format(len(all_tracks)))
    t_ids = track_ids(all_tracks)

    audio_features = audio_feature_collector(t_ids)
    print('audio features are collected')
    
    return [audio_features, all_tracks]
    
    
    

## Running the pipeline function to get song data 

In [None]:
genres = ["60s","70s","80s","90s","00s","10s", "20s","Country","Electronic","Funk","Hip-hop","Jazz","Latin", "Pop","Punk","Reggae","Rock","Metal","Soul", "R&B","Classic Music"]


df = pipeline(genres)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
playlists are collected
playlists are filtered, 204 playlists were found
track ids are collected
In total 119235 tracks were obtained
0 iterations complete
1000 iterations complete


## Concatenating the data frames to have all tracks in one data frame

## Adding song and artist name to the features dataframe

In [None]:
audio_features = df[0]
all_tracks = df[1]


In [None]:
for i in audio_features:
    if type(i) == None:
        del i


    


In [None]:


df=pd.DataFrame(audio_features)    
df=df[["danceability","energy","loudness","speechiness","acousticness",
"instrumentalness","liveness","valence","tempo","id","duration_ms"]]
df=df.sort_index(axis=1)

artist_list = []
song_list = []
id_list = []

for i in range(len(all_tracks)):
    artist = all_tracks[i]['track']['artists'][0]['name']
    song_name= all_tracks[i]['track']['name']
    song_id =  all_tracks[i]['track']['id']
    artist_list.append(artist)
    song_list.append(song_name)
    id_list.append(song_id)



song_info = {'artist':artist_list, 'song':song_list, 'id':id_list}    
df_song_info = pd.DataFrame(song_info)
df_last = pd.merge(df, df_song_info, on="id")
df_last

In [None]:
import pickle

with open('outfile', 'wb') as fp:
    pickle.dump(audio_features, fp)

In [None]:
with open('outfile', 'rb') as f:
    x = pickle.load(f)

In [None]:
x

In [None]:
list_of_audio_features = list(filter(None, x))

In [None]:
df = pd.DataFrame(list_of_audio_features)

In [None]:
df

In [None]:
df.to_csv('listofaudio.csv')