# Song Recommender

## Connecting to API 

In [1]:
import config
import pandas as pd
import time
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets # sklearn comes with some toy datasets to practise
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
from kneed import KneeLocator
from IPython.display import IFrame
from helpers import play_song
from helpers import get_playlist_tracks
from helpers import load




In [2]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [3]:
config.client_id
config.client_secret

'7600aaf3d2c548358653f125b5caecbf'

## Defining necessary functions to create a data pipeline and obtain a dataframe with audio features of diverse tracks

In [3]:
def get_playlist_tracks(username, playlist_id):
    try:
        
        results = sp.user_playlist_tracks(username,playlist_id,market="GB")
        tracks = results['items']
        while results['next']:
            try:
                results = sp.next(results)
                tracks.extend(results['items'])
            except:
                pass
    except:
        pass
    return tracks

In [4]:
def pl_collector(genres):
    playlists_all = []                     # searching for playlists which contain keywords in genres list



    for id, genre in enumerate(genres):
    
        print(id)
        try:
            pl_result = sp.search(q=genre, limit=50,type='playlist',market='GB')
            playlists = pl_result['playlists']['items']
            playlists_all.extend(playlists)
        except:
            pass
    return playlists_all

In [5]:
def pl_filter(playlists_all):                #filtering tracklist ids that have >100 songs 

    
    playlists_200 = []

    for i in range(len(playlists_all)):
    
        no_tracks = playlists_all[i]['tracks']['total']
        if no_tracks > 200:
            playlists_200.append(playlists_all[i]['id'])

    no_of_tracks = 0
    for item in playlists_all:
            if item['id'] in playlists_200:
                no = item['tracks']['total']
                no_of_tracks += no
    return playlists_200

In [6]:
def track_collector(playlists_200):
        
    
    all_tracks = []
    for id in playlists_200:
        tracks = get_playlist_tracks("spotify", id)
        all_tracks.extend(tracks)
    return all_tracks
    


In [7]:
def track_ids(all_tracks):    
    track_ids = []
    

    for item in all_tracks: 
        try:
                                                                #Collecting ids of all tracks in one list
            id_no = item['track']['id']
            track_ids.append(id_no)
        except:
            pass
    return track_ids

In [8]:
def audio_feature_collector(track_ids):             #returns a list of audio features of all tracks 
    
    list_of_audio_features=[]

    

    for it, id_no in enumerate(track_ids):
        

        
        try:
            list_of_audio_features.append(sp.audio_features(id_no)[0])
            moment = time.time()
            if it%1000==0:
                time.sleep(3)
                print('{} iterations complete'.format(it))
        except:

            pass
           
    return list_of_audio_features

In [9]:
def pipeline(lst_of_genre):
    pls = pl_collector(lst_of_genre)
    print('playlists are collected')
    pl_filtered = pl_filter(pls)
    print('playlists are filtered, {} playlists were found'.format(len(pl_filtered)))
    all_tracks = track_collector(pl_filtered)
    print('track ids are collected')
    print('In total {} tracks were obtained'.format(len(all_tracks)))
    t_ids = track_ids(all_tracks)

    audio_features = audio_feature_collector(t_ids)
    print('audio features are collected')
    
    return [audio_features, all_tracks]
    
    
    

## Running the pipeline function to get song data 

In [12]:
genres = ["60s","70s","80s","90s","00s","10s", "20s","Country","Electronic","Funk","Hip-hop","Jazz","Latin", "Pop","Punk","Reggae","Rock","Metal","Soul", "R&B","Classic Music"]


df = pipeline(genres)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
playlists are collected
playlists are filtered, 142 playlists were found
track ids are collected
In total 64535 tracks were obtained
0 iterations complete
1000 iterations complete
2000 iterations complete
3000 iterations complete
4000 iterations complete
5000 iterations complete
6000 iterations complete
7000 iterations complete
8000 iterations complete
9000 iterations complete
10000 iterations complete
11000 iterations complete
12000 iterations complete
13000 iterations complete
14000 iterations complete
15000 iterations complete
16000 iterations complete
17000 iterations complete
18000 iterations complete
19000 iterations complete
20000 iterations complete
21000 iterations complete
22000 iterations complete
23000 iterations complete
24000 iterations complete
25000 iterations complete
26000 iterations complete
27000 iterations complete
28000 iterations complete
29000 iterations complete
30000 iterations complete
31000 iterations comp

## Concatenating the data frames to have all tracks in one data frame

## Adding song and artist name to the features dataframe

In [15]:
audio_features = df[0]
all_tracks = df[1]


In [17]:
len(audio_features)

64118

In [28]:
for i in audio_features:
    if type(i) == None:
        del i


    


In [29]:
len(audio_features)

64118

In [1]:


df=pd.DataFrame(audio_features)    
df=df[["danceability","energy","loudness","speechiness","acousticness",
"instrumentalness","liveness","valence","tempo","id","duration_ms"]]
df=df.sort_index(axis=1)

artist_list = []
song_list = []
id_list = []

for i in range(len(all_tracks)):
    artist = all_tracks[i]['track']['artists'][0]['name']
    song_name= all_tracks[i]['track']['name']
    song_id =  all_tracks[i]['track']['id']
    artist_list.append(artist)
    song_list.append(song_name)
    id_list.append(song_id)



song_info = {'artist':artist_list, 'song':song_list, 'id':id_list}    
df_song_info = pd.DataFrame(song_info)
df_last = pd.merge(df, df_song_info, on="id")
df_last

NameError: name 'pd' is not defined

In [31]:
import pickle

with open('outfile', 'wb') as fp:
    pickle.dump(audio_features, fp)

In [10]:
with open('outfile', 'rb') as f:
    x = pickle.load(f)

In [11]:
x

[{'danceability': 0.619,
  'energy': 0.603,
  'key': 9,
  'loudness': -9.481,
  'mode': 1,
  'speechiness': 0.0342,
  'acousticness': 0.712,
  'instrumentalness': 0,
  'liveness': 0.0721,
  'valence': 0.958,
  'tempo': 127.433,
  'type': 'audio_features',
  'id': '48i055G1OT5KxGGftwFxWy',
  'uri': 'spotify:track:48i055G1OT5KxGGftwFxWy',
  'track_href': 'https://api.spotify.com/v1/tracks/48i055G1OT5KxGGftwFxWy',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/48i055G1OT5KxGGftwFxWy',
  'duration_ms': 178933,
  'time_signature': 4},
 {'danceability': 0.396,
  'energy': 0.293,
  'key': 2,
  'loudness': -14.062,
  'mode': 1,
  'speechiness': 0.0275,
  'acousticness': 0.941,
  'instrumentalness': 0.000196,
  'liveness': 0.105,
  'valence': 0.343,
  'tempo': 100.307,
  'type': 'audio_features',
  'id': '44AyOl4qVkzS48vBsbNXaC',
  'uri': 'spotify:track:44AyOl4qVkzS48vBsbNXaC',
  'track_href': 'https://api.spotify.com/v1/tracks/44AyOl4qVkzS48vBsbNXaC',
  'analysis_url': 'https://a

In [12]:
list_of_audio_features = list(filter(None, x))

In [13]:
df = pd.DataFrame(list_of_audio_features)

In [14]:
df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.619,0.60300,9,-9.481,1,0.0342,0.712,0.000000,0.0721,0.958,127.433,audio_features,48i055G1OT5KxGGftwFxWy,spotify:track:48i055G1OT5KxGGftwFxWy,https://api.spotify.com/v1/tracks/48i055G1OT5K...,https://api.spotify.com/v1/audio-analysis/48i0...,178933,4
1,0.396,0.29300,2,-14.062,1,0.0275,0.941,0.000196,0.1050,0.343,100.307,audio_features,44AyOl4qVkzS48vBsbNXaC,spotify:track:44AyOl4qVkzS48vBsbNXaC,https://api.spotify.com/v1/tracks/44AyOl4qVkzS...,https://api.spotify.com/v1/audio-analysis/44Ay...,182360,3
2,0.491,0.58300,7,-10.964,1,0.0376,0.185,0.000000,0.4060,0.908,150.566,audio_features,3yrSvpt2l1xhsV9Em88Pul,spotify:track:3yrSvpt2l1xhsV9Em88Pul,https://api.spotify.com/v1/tracks/3yrSvpt2l1xh...,https://api.spotify.com/v1/audio-analysis/3yrS...,183307,4
3,0.390,0.31000,5,-14.545,1,0.0676,0.794,0.000000,0.3790,0.584,177.515,audio_features,3RkQ3UwOyPqpIiIvGVewuU,spotify:track:3RkQ3UwOyPqpIiIvGVewuU,https://api.spotify.com/v1/tracks/3RkQ3UwOyPqp...,https://api.spotify.com/v1/audio-analysis/3RkQ...,330533,4
4,0.663,0.60000,7,-10.870,1,0.0320,0.430,0.000000,0.1840,0.800,129.991,audio_features,7tqhbajSfrz2F7E1Z75ASX,spotify:track:7tqhbajSfrz2F7E1Z75ASX,https://api.spotify.com/v1/tracks/7tqhbajSfrz2...,https://api.spotify.com/v1/audio-analysis/7tqh...,151667,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64095,0.477,0.00669,5,-36.430,1,0.0518,0.995,0.908000,0.0591,0.258,128.611,audio_features,4UjMKBpI6ukfH31WCfeh4O,spotify:track:4UjMKBpI6ukfH31WCfeh4O,https://api.spotify.com/v1/tracks/4UjMKBpI6ukf...,https://api.spotify.com/v1/audio-analysis/4UjM...,137707,4
64096,0.310,0.15600,9,-22.460,0,0.0411,0.994,0.879000,0.0894,0.378,87.922,audio_features,1UEDMKbWoJqaD91bRC8iZx,spotify:track:1UEDMKbWoJqaD91bRC8iZx,https://api.spotify.com/v1/tracks/1UEDMKbWoJqa...,https://api.spotify.com/v1/audio-analysis/1UED...,244093,3
64097,0.394,0.01300,3,-25.399,1,0.0538,0.977,0.970000,0.1310,0.268,78.927,audio_features,7yD1pjo3OfslfWikXPGcAR,spotify:track:7yD1pjo3OfslfWikXPGcAR,https://api.spotify.com/v1/tracks/7yD1pjo3Ofsl...,https://api.spotify.com/v1/audio-analysis/7yD1...,42867,4
64098,0.355,0.04430,0,-24.512,1,0.0355,0.994,0.882000,0.1090,0.149,76.587,audio_features,2hNbUtLFQxp0uQ3fKju16M,spotify:track:2hNbUtLFQxp0uQ3fKju16M,https://api.spotify.com/v1/tracks/2hNbUtLFQxp0...,https://api.spotify.com/v1/audio-analysis/2hNb...,198427,3


In [16]:
df.to_csv('listofaudio.csv')