In [1]:
import spotipy
import spotipy.util as util
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler, Normalizer
import pandas as pd
from pandas.io.json import json_normalize
from flask import jsonify
from joblib import load
import pickle
import numpy as np
from flask import request
from joblib import dump
from joblib import load
import pandas as pd

from env_vars import * 

In [122]:
USERNAME = USERNAME #your spotify username
CLIENT_ID = CLIENT_ID #set at your developer account
CLIENT_SECRET = CLIENT_SECRET #set at your developer account
REDIRECT_URI = 'http://google.com/' #set at your developer account, usually "http://localhost:8000"
SCOPE = 'user-library-read' # or else
# ps. REDIRECT_URI is crucial here. if http://localhost:8000 is not set, or with a single '/' misplaced, it will generate a connection error.

# then pass them:

token = util.prompt_for_user_token(username = USERNAME, 
                                   scope = SCOPE, 
                                   client_id = CLIENT_ID, 
                                   client_secret = CLIENT_SECRET, 
                                   redirect_uri = REDIRECT_URI)

if token:
   sp = spotipy.Spotify(auth=token)

# From predict.py - Production

In [3]:
def instantiate_sp(token):
    sp = spotipy.Spotify(auth=token)
    return sp

In [4]:
instantiate_sp(token)

<spotipy.client.Spotify at 0x124937ed0>

In [5]:
def get_user_song_id(sp):
    results = sp.current_user_saved_tracks()
    genre = []
    counter = 0 
    for song_number in range(0,19):
        counter += 1 
        song_id = results['items'][song_number]['track']['id']
        artist_id = get_artist_id(song_id)
        genre = get_genres(artist_id)
        if genre != []:
            break
    return song_id

In [6]:
def get_acoustical_features(song_id,sp):
    return sp.audio_features(song_id)[0]

In [7]:
def get_popularity(trackID):
    return sp.track(trackID)['popularity']

In [8]:
def get_artist_id(trackID):
    return sp.track(trackID)['artists'][0]['id']

In [9]:
def get_genres(artistID):
    return sp.artist(artistID)['genres']

In [10]:
song_id = get_user_song_id(instantiate_sp(token))

In [11]:
song_id

'1WbwlYiiMZGQlrFGirRnHv'

# Production Flask Predict function

In [12]:
def get_features(song_id,sp):
    results_dict = sp.audio_features(song_id)[0]
    audio_features = {
        "audio_features": {
            key: results_dict[key] for key in results_dict.keys() & {
                'danceability',
                'energy',
                'key',
                'loudness',
                'mode',
                'speechiness',
                'acousticness',
                'instrumentalness',
                'liveness',
                'valence',
                'tempo',
                'time_signature'}}}

    return audio_features

In [13]:
def predict(content):
    similar_songs = []
    print('Loading dataframe...')
    dataframe = pd.DataFrame.from_dict(
        json_normalize(content['audio_features']),
                                orient='columns')
    print(dataframe)
    print('Dataframe Object Created')
    print('Loading pickled scaler...')
    scalar = load('./models/scalar2.joblib')
    print('Pickled scaler loaded')
    print('Scaling dataframe object...')
    df_array = np.array(dataframe)
    print(df_array)
    dataframe_scaled = scalar.transform(df_array)
    print(dataframe_scaled)
    print('Dataframe scaled')
    print('Loading pickled model...')
    model = load('./models/model2.joblib')
    print('Model loaded')
    results = model.kneighbors([dataframe_scaled][0])[1]
    print('Prediction executed')
    print('song_id_list loading...')
    song_id_list = load('./data/song_id_list2.joblib')
    print('song_id_list loaded')
    
    print('beginning for loop...')
    for song_row in results[0][1:]:
        song_id = song_id_list[song_row]
        similar_songs.append({'similarity': [.99], 'values': song_id})
    json_dict = {"songs": similar_songs}
    return json_dict

In [14]:
predict(get_features(song_id,sp))

Loading dataframe...
   danceability  energy    tempo  speechiness  key  liveness  valence  \
0          0.62   0.231  104.853       0.0486   11    0.0741    0.053   

   instrumentalness  loudness  time_signature  acousticness  mode  
0             0.911   -23.873               4         0.961     1  
Dataframe Object Created
Loading pickled scaler...
Pickled scaler loaded
Scaling dataframe object...
[[ 6.20000e-01  2.31000e-01  1.04853e+02  4.86000e-02  1.10000e+01
   7.41000e-02  5.30000e-02  9.11000e-01 -2.38730e+01  4.00000e+00
   9.61000e-01  1.00000e+00]]
[[ 6.22489960e-01  2.31695085e-01  1.04853000e+02  4.86000000e-02
   1.00000000e+00  7.41000000e-02  9.05285215e-01  9.11000000e-01
  -2.46621901e+01  1.60010881e-02  1.92200000e-01  1.00000000e+00]]
Dataframe scaled
Loading pickled model...
Model loaded
Prediction executed
song_id_list loading...
song_id_list loaded
beginning for loop...


{'songs': [{'similarity': [0.99], 'values': '5JCOty8yyvF9TViLuErnGs'},
  {'similarity': [0.99], 'values': '6TlIKA2WbzqMe5XYSfZlAN'},
  {'similarity': [0.99], 'values': '55GUHzqAzcnLQxxXUzUXBG'},
  {'similarity': [0.99], 'values': '25KWesixaxoMc9pw0yKTJ5'},
  {'similarity': [0.99], 'values': '3ga81Ra1zTzxrdl1QEi1Vq'},
  {'similarity': [0.99], 'values': '0YsxUUcK9nAbDBMKyAQrxK'},
  {'similarity': [0.99], 'values': '41hQCtZslX1BMRB8bsswlm'},
  {'similarity': [0.99], 'values': '3iM84eZQ6JLFfWLcqF9mCd'},
  {'similarity': [0.99], 'values': '2BW7QcTMnChZR8XGTVZzf9'},
  {'similarity': [0.99], 'values': '53E0iLvaSlApDIcIdL7Q3G'},
  {'similarity': [0.99], 'values': '1jb95UllYPKzg0tZaEhj78'},
  {'similarity': [0.99], 'values': '6wzvPnGC7pnB2Yms834o1l'},
  {'similarity': [0.99], 'values': '4PWlZhmZSuSvfne0jXMNSg'},
  {'similarity': [0.99], 'values': '4WnqkG5DAC8GWM6d4Hyqdf'},
  {'similarity': [0.99], 'values': '3LCA1ZrMYxeQ9j9ZeIE9uN'},
  {'similarity': [0.99], 'values': '4Tr0syfaxqpeq3MlVvmJdb'},

In [15]:
song_id

'1WbwlYiiMZGQlrFGirRnHv'

In [172]:
class Sound_Drip:
    
    def __init__(self, token):
        self.token = token
    
    def instantiate_sp(self,token):
        sp = spotipy.Spotify(auth=token)
        return sp

    def get_user_song_id(self,sp):
        results = sp.current_user_saved_tracks()
        genre = []
        counter = 0 
        for song_number in range(0,19):
            counter += 1 
            song_id = results['items'][song_number]['track']['id']
            artist_id = get_artist_id(song_id)
            genre = get_genres(artist_id)
            if genre != []:
                break
        return song_id

    def get_acoustical_features(self,song_id,sp):
        acoustical_features = sp.audio_features(song_id)[0]
        return acoustical_features

    def get_popularity(self, song_id):
        popularity =  sp.track(song_id)['popularity']
        return popularity

    def get_artist_id(self, song_id):
        artist = sp.track(song_id)['artists'][0]['id']
        return artist

    def get_genres(self, artist):
        genre = sp.artist(artist)['genres']
        return genre
    
    def create_feature_object(self,popularity, acoustical_features):
        popularity_dict = {'popularity': popularity}
        song_features = acoustical_features
        song_features.update(popularity_dict)
        song_features = {
    "audio_features": {
        key: song_features[key] for key in song_features.keys() & {
            'popularity',
            'acousticness',
            'danceability',
            'energy',
            'instrumentalness',
            'key',
            'liveness',
            'loudness',
            'mode',
            'speechiness',
            'tempo',
            'time_signature',
            'valence'}}}

        df = pd.DataFrame.from_dict(json_normalize(song_features["audio_features"]),orient='columns')   
        df = df.reindex(sorted(df.columns), axis=1)
        return df
    
    def filter_model(model_results,source_genre_list): 
        #loop takes KNN results and filters by source track genres
        genre_array = pickle.load(open("./data/genres_array_2.pkl","rb"))
        filtered_list = []
        song_list_length = 20
        for output_song_index in model_results[0][1:]:
            output_genre_list = genre_array[output_song_index]
            for output_genre in output_genre_list:
                output_genre = output_genre.strip(" ")
                for source_genre in source_genre_list:
                    source_genre = "'" + source_genre + "'"
                    if source_genre == output_genre:
                        filtered_list.append(output_song_index)
                    else:
                        continue
        if len(set(filtered_list)) > song_list_length:
            filtered_list = set(filtered_list)
            filtered_list = list(filtered_list)[0:20]
        else:
            counter = song_list_length - len(set(filtered_list))
            print(len(set(filtered_list)))
            print(counter)
            print(f'need to add {counter} items to final song output')
            for output_song_index in model_results[1:]:
                if output_song_index not in filtered_list:
                    if counter > 0:
                        filtered_list.append(output_song_index)
                        counter -= 1
                    else:
                        break
        return filtered_list

In [17]:
#         audio_features = {
#             "audio_features": {
#                 key: raw_results[key] for key in results_dict.keys() & {
#                     'danceability',
#                     'energy',
#                     'key',
#                     'loudness',
#                     'mode',
#                     'speechiness',
#                     'acousticness',
#                     'instrumentalness',
#                     'liveness',
#                     'valence',
#                     'tempo',
#                     'time_signature'}}}

In [123]:
sd = Sound_Drip(token)

In [124]:
sp = sd.instantiate_sp(token)

In [125]:
song_id = sd.get_user_song_id(sp)

In [121]:
song_id

'1WbwlYiiMZGQlrFGirRnHv'

In [22]:
acoustical_features = sd.get_acoustical_features(song_id, sp)

In [23]:
song_id

'1WbwlYiiMZGQlrFGirRnHv'

In [24]:
popularity = sd.get_popularity(song_id)

In [25]:
popularity

31

In [26]:
artist = sd.get_artist_id(song_id)

In [27]:
artist

'5mNY0NPszdalbrb4ITO3M8'

In [28]:
genre = sd.get_genres(artist)

In [29]:
genre

['ambient',
 'art pop',
 'art rock',
 'berlin school',
 'canterbury scene',
 'drone',
 'experimental',
 'experimental pop',
 'experimental rock',
 'fourth world',
 'free improvisation',
 'industrial',
 'krautrock',
 'no wave',
 'post-punk']

In [30]:
song_features = sd.create_feature_object(popularity, acoustical_features)

In [31]:
song_features

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,time_signature,valence
0,0.961,0.62,0.231,0.911,11,0.0741,-23.873,1,31,0.0486,104.853,4,0.053


In [32]:
# array([[ 2.0800e-02,  4.4900e-01,  7.0400e-01,  0.0000e+00,  5.0000e+00,
#          3.2200e-01, -2.8300e+00,  0.0000e+00,  4.6000e+01,  3.3500e-02,
#          7.4985e+01,  4.0000e+00,  2.3400e-01]])

In [143]:
## Updated function - Not in Flask - 

def get_results(song_features_df):
    scaler = load("./data/scalar3.joblib")
#     print('Scaling data...')
    data_scaled = scaler.transform(song_features_df)
#     print(data_scaled)
#     print('Data scaled')
    normalizer = Normalizer()
#     print('Normalizing data...')
    data_normalized = normalizer.fit_transform(data_scaled)
#     print(data_normalized)
#     print('Data normalized')
#     print('Loading pickled model...')
    model = load('./models/model5.joblib')
    results = model.kneighbors([data_normalized][0])[1:]
    print('Prediction executed')
    return results[0]

In [137]:
get_results(song_features)

array([[222673, 188930, 389656, ..., 113323, 198794,  52503]])

In [138]:
results = get_results(song_features)

In [139]:
results[0][1:10]

array([188930, 389656, 546188, 492763, 448984, 570380, 526192, 569447,
       490760])

In [148]:
song_id_array = pickle.load(open("./data/song_id_array3.pkl","rb"))

In [154]:
song_id_array[1]

'41RpZW2lxAdnqDd2nMBzLQ'

In [157]:
def song_id_prediction_output(filtered_list): 
    similar_songs = []
    print('song_id_list loading...')
    song_id_array = pickle.load(open('./data/song_id_array3.pkl', 'rb'))
    print('song_id_list loaded')
    print('beginning for loop...')
    for song_row in filtered_list:
        song_id = song_id_array[song_row]
        similar_songs.append({'similarity': [.99], 'values': song_id})
    json_dict = {"songs": similar_songs}
    print("Results returned")
    return json_dict

In [171]:
song_id_prediction_output(filtered_list)

song_id_list loading...
song_id_list loaded
beginning for loop...
Results returned


{'songs': [{'similarity': [0.99], 'values': '2C1FzUUtHp31ZdQsTGDM2f'},
  {'similarity': [0.99], 'values': '1NVsdk9Z0xNG2sUhZmkbD0'},
  {'similarity': [0.99], 'values': '5SwPRje7oQV5RP0saxoZp8'},
  {'similarity': [0.99], 'values': '5RrWu7ddslbnrRImeW7LOs'},
  {'similarity': [0.99], 'values': '0FQjfJB4cdkVfxZHz8RV4M'},
  {'similarity': [0.99], 'values': '6PF0WYSCH6OhGKHRHOQSxG'},
  {'similarity': [0.99], 'values': '6vEAyRqugkWBfQpccdiWFU'},
  {'similarity': [0.99], 'values': '7g7aNOC904ne2PUc14Xevb'},
  {'similarity': [0.99], 'values': '4jJwm24UagUP51wTp657Xp'},
  {'similarity': [0.99], 'values': '6roY6Z2b0Q7DH3ZM4SRoTz'},
  {'similarity': [0.99], 'values': '5wmn1zWhSSDLUmLLwYO9Rj'},
  {'similarity': [0.99], 'values': '34tFidHrETMAjsiK87WVVW'},
  {'similarity': [0.99], 'values': '0rUyA7P4nPKCRVvKhFo7XL'},
  {'similarity': [0.99], 'values': '5i2859ZJXGPOzDwdZSOMiH'},
  {'similarity': [0.99], 'values': '3QOQEObqJvCi4M2SNeko7V'},
  {'similarity': [0.99], 'values': '2raNLL2FWxWNA4veLZIuAj'},

# For prediction.py - New Functions

In [168]:
def filter_model(model_results,source_genre_list): 
    #loop takes KNN results and filters by source track genres
    genre_array = pickle.load(open("./data/genres_array_2.pkl","rb"))
    filtered_list = []
    song_list_length = 20
    for output_song_index in model_results[0][1:]:
        output_genre_list = genre_array[output_song_index]
        for output_genre in output_genre_list:
            output_genre = output_genre.strip(" ")
            for source_genre in source_genre_list:
                source_genre = "'" + source_genre + "'"
                if source_genre == output_genre:
                    filtered_list.append(output_song_index)
                else:
                    continue
    if len(set(filtered_list)) > song_list_length:
        filtered_list = set(filtered_list)
        filtered_list = list(filtered_list)[0:20]
    else:
        counter = song_list_length - len(set(filtered_list))
        print(len(set(filtered_list)))
        print(counter)
        print(f'need to add {counter} items to final song output')
        for output_song_index in model_results[1:]:
            if output_song_index not in filtered_list:
                if counter > 0:
                    filtered_list.append(output_song_index)
                    counter -= 1
                else:
                    break
    return filtered_list    

In [169]:
# results = predictfunc(song_features)
source_genre_list = (get_genres(get_artist_id(song_id)))
# source_genre_list =['techno']
# print(source_genre_list)
filtered_list = filter_model(results,source_genre_list)

In [170]:
len(filtered_list)

20

In [261]:
len(results[1:])

4999

# Sandbox - Play Dirty!

In [53]:
# import numpy as np

# old_song_list = load('data/song_id_list2(1).joblib')

# old_song_array = np.array(old_song_list)

# pickle.dump(old_song_array, open('data/old_song_array.pkl', 'wb'))

In [74]:
get_user_song_id(sp)

ReadTimeout: HTTPSConnectionPool(host='api.spotify.com', port=443): Read timed out. (read timeout=None)

# Older Genre Filtering Code for Reference - Attemping another approach using dataframe

In [306]:
# #loop takes KNN results and filters by source track genres
# # old
# filtered_list = []
# # filtered_lists = []
# source_song_index = knn_results1[0][0]
# source_genre_list = df.loc[source_song_index]["genres_stripped"].split(",")
# # print(source_genre_list)
# for output_song_index in knn_results1[0][1:]:
# #     print(output_song_index)
#     output_genre_list = df.loc[output_song_index]["genres_stripped"].split(",")
# #     print(output_genre_list)
#     for output_genre in output_genre_list:
#         output_genre = output_genre.strip(' ')
# #         print(output_genre)
#         for source_genre in source_genre_list:
#             source_genre = source_genre.strip(' ')
# #             print(source_genre)
#             if source_genre == output_genre:
# #                 print(output_song_index,source_genre,output_genre)
#                 filtered_list.append(output_song_index)
# #             print(filtered_list)
#             else:
# # #             print("source genre does not match output genre!")
#                 continue
# #     filtered_lists.append(filtered_list)
# print(len(set(filtered_list)),"!!!!!")

807 !!!!!


# For Reference - Working code in KNN Model 3 Notebook - 
# Ready for Production - Load in Genre Array and Utilize in updated function for genre filtering

In [None]:
def filter_model(): 
    #loop takes KNN results and filters by source track genres
    filtered_list = []
    song_list_length = 364
    source_song_index = knn_results1[0][0]
    source_genre_list = genre_array[source_song_index]
    for output_song_index in knn_results1[0][1:]:
        output_genre_list = genre_array[output_song_index]
        for output_genre in output_genre_list:
            for source_genre in source_genre_list:
                if source_genre == output_genre:
                    filtered_list.append(output_song_index)
                else:
                    continue
    if len(set(filtered_list)) > song_list_length:
        print("list large enough")
    else:
        counter = song_list_length - len(set(filtered_list))
        print(len(set(filtered_list)))
        print(counter)
        print(f'need to add {counter} items to final song output')
        for output_song_index in knn_results1[0][1:]:
            if output_song_index not in filtered_list:
                if counter > 0:
                    filtered_list.append(output_song_index)
                    counter -= 1
                else:
                    break
            
#     print(len(output_song_index_list),"!!!!!")    

In [None]:
filter_model()

# Test Multiple Rows - Functional!

In [272]:
def output_len_of_lists(list_of_lists):
    for list_item in list_of_lists:
        print(len(list_item))

In [None]:
#loop takes range of KNN results per defined track range and filter matches by source track genres
filtered_lists = []
for source_song_index in range(0,1000):
    knn_results1 = knn1.kneighbors([data_normalized[source_song_index]])[1]
    knn_result_ouputs = knn_results1[0][1:]
    filtered_list = []
    source_genre_list = genre_array[source_song_index]
    for output_song_index in knn_result_ouputs:
        output_genre_list = genre_array[output_song_index]
        for output_genre in output_genre_list:
            for source_genre in source_genre_list:
                if source_genre == output_genre:
                    filtered_list.append(output_song_index)
                else:
                    continue
    filtered_lists.append(set(filtered_list))
    
output_len_of_lists(filtered_lists)

# Definining New Class for Use in Predict.py