In [1]:
from env_vars import *
from flask import jsonify, request
from functools import reduce
from joblib import load, dump
import json
from more_itertools import unique_everseen
import numpy as np
import pandas as pd
from pandas import json_normalize
import pickle
import psycopg2 as ps
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler, Normalizer
import spotipy
import spotipy.util as util
import sys
import time

#### INVESTIGATE this error message in cli
"""FutureWarning: Method cleanup(connection_file=True) is deprecated, use cleanup_resources(restart=False). warnings.warn("Method cleanup(connection_file=True) is deprecated, use cleanup_resources(restart=False).""""

#### Token generation

In [38]:
token = util.prompt_for_user_token(username = USERNAME, 
                                   scope = SCOPE, 
                                   client_id = CLIENT_ID, 
                                   client_secret = CLIENT_SECRET, 
                                   redirect_uri = REDIRECT_URI)

if token:
   sp = spotipy.Spotify(auth=token)

### Application code from elastic beanstalk environment:
##### >Version label: app-200609_131842-8
##### >Source:20201624wO-sd_061020_v2.zip

#### Flow Reference

START TIME FOR get_user_ids<br/>
TOTAL get_user_ids TIME: 0.18714261054992676 seconds<br/>
START TIME FOR get_stale_seed<br/>
START TIME FOR db_connect<br/>
 TOTAL db_connect TIME: 0.8159277439117432 seconds<br/>
 TOTAL get_stale_seed TIME: 1.0477402210235596 seconds<br/>
START TIME FOR get_stale_results<br/>
START TIME FOR db_connect<br/>
 TOTAL db_connect TIME: 0.7740335464477539 seconds<br/>
 TOTAL get_stale_results TIME: 1.0157630443572998 seconds<br/>
START TIME FOR get_user_song_id_source_genre<br/>
START TIME FOR get_artist_id<br/>
 TOTAL get_artist_id TIME: 0.10947275161743164 seconds<br/>
START TIME FOR get_genres<br/>
 TOTAL get_genres TIME: 0.09751224517822266 seconds<br/>
 TOTAL get_user_song_id_source_genre TIME: 0.34342408180236816 seconds<br/>
START TIME FOR get_acoustical_features<br/>
 TOTAL get_acoustical_features TIME: 0.09440803527832031 seconds<br/>
START TIME FOR get_popularity<br/>
 TOTAL get_popularity TIME: 0.08349156379699707 seconds<br/>
START TIME FOR create_feature_object<br/>
 TOTAL create_feature_object TIME: 0.00498652458190918 seconds<br/>
START TIME FOR get_results<br/>
 TOTAL get_results TIME: 0.7330377101898193 seconds<br/>
START TIME FOR filter_model<br/>
 TOTAL filter_model TIME: 0.9584643840789795 seconds<br/>
START TIME FOR song_id_prediction_output<br/>
 TOTAL song_id_prediction_output TIME: 0.06779003143310547 seconds<br/>
START TIME FOR insert_user_predictions<br/>
START TIME FOR db_connect<br/>
 TOTAL db_connect TIME: 0.832514762878418 seconds<br/>
 TOTAL insert_user_predictions TIME: 3.6547586917877197 seconds<br/>

#### Added time statements to Sound_drip methods for benchmark(s)

In [None]:
time_table = {'get_user_ids': [],
              'get_user_song_id_source_genre': [],
              'get_acoustical_features': [],
              'get_acoustical_features': [],
              'get_popularity': [],
              'get_artist_id': [],
              'get_genres': [],
              'create_feature_object': [],
              'get_results': [],
              'filter_model': [], 
              'song_id_prediction_output': [],
              'db_connect': [],
              'insert_user_predictions': [],
              'get_stale_results': [],
              'get_stale_seed': []
             }

if FLASK_ENV == 'production':
    db_table = 'recommendations'
elif FLASK_ENV == 'development':
    db_table = 'recommendations_dev'

class Sound_Drip:
    

    def __init__(self, token):
        self.token = token
        self.sp = spotipy.Spotify(auth=self.token)
        self.user_id, self.display_name = self.get_user_ids()
        self.stale_seed_list = self.get_stale_seed()
        self.stale_results_list = self.get_stale_results()
        self.song_id, self.source_genre = self.get_user_song_id_source_genre()
        self.acoustical_features = self.get_acoustical_features(self.song_id)
        self.popularity = self.get_popularity(self.song_id)
        self.song_features_df = self.create_feature_object(
            self.popularity, self.acoustical_features)
        self.results = self.get_results(self.song_features_df)
        self.filtered_list = self.filter_model(self.results, self.source_genre)
        self.song_id_predictions = self.song_id_prediction_output(
            self.filtered_list)
        self.insert_user_predictions()#, print("predicts inserted into db")

    def get_user_ids(self):
        
        #print("START TIME FOR get_user_ids")
        start_time = time.time()
        current_user_dict = self.sp.current_user()
        display_name = current_user_dict['display_name']
        user_id = current_user_dict['id']
        #print("retrieving user id and display name for current token")
        #print(f"TOTAL get_user_ids TIME: {time.time() - start_time} seconds")
        time_table['get_user_ids'].append(time.time() - start_time)
        return user_id, display_name

    def get_user_song_id_source_genre(self):
        
        #print("START TIME FOR get_user_song_id_source_genre")
        start_time = time.time()
        stale_songs = self.stale_seed_list
        results = self.sp.current_user_saved_tracks(limit=50)
        for song_number in range(0, len(results['items'])):
            #print(song_number)
            song_id = results['items'][song_number]['track']['id']
            #print(song_id)
            if song_id not in stale_songs:
                artist_id = self.get_artist_id(song_id)
                genre = self.get_genres(artist_id)
                #print(genre)
                if genre != []:
                    break
                else:
                    continue
            else:
                if song_number == len(results['items']) - 1:
                    #print("application out of fresh seeds")
                    for song_id in stale_songs:
                        artist_id = self.get_artist_id(song_id)
                        genre = self.get_genres(artist_id)
                        if genre != []:
                            break
                        else:
                            continue
        #print(f" TOTAL get_user_song_id_source_genre TIME: {time.time() - start_time} seconds")
        time_table['get_user_song_id_source_genre'].append(time.time() - start_time)
        return song_id, genre

    def get_acoustical_features(self, song_id):
        
        #print("START TIME FOR get_acoustical_features")
        start_time = time.time()
        acoustical_features = self.sp.audio_features(song_id)[0]
        #print(f" TOTAL get_acoustical_features TIME: {time.time() - start_time} seconds")
        time_table['get_acoustical_features'].append(time.time() - start_time)
        return acoustical_features

    def get_popularity(self, song_id):
        
        #print("START TIME FOR get_popularity")
        start_time = time.time()
        popularity = self.sp.track(song_id)['popularity']
        #print(f" TOTAL get_popularity TIME: {time.time() - start_time} seconds")
        time_table['get_popularity'].append(time.time() - start_time)
        return popularity

    def get_artist_id(self, song_id):
        
        #print("START TIME FOR get_artist_id")
        start_time = time.time()
        artist = self.sp.track(song_id)['artists'][0]['id']
        #print(f" TOTAL get_artist_id TIME: {time.time() - start_time} seconds")
        time_table['get_artist_id'].append(time.time() - start_time)
        return artist

    def get_genres(self, artist):
        
        #print("START TIME FOR get_genres")
        start_time = time.time()
        genre = self.sp.artist(artist)['genres']
        #print(f" TOTAL get_genres TIME: {time.time() - start_time} seconds")
        time_table['get_genres'].append(time.time() - start_time)
        return genre

    def create_feature_object(self, popularity, acoustical_features):
        
        #print("START TIME FOR create_feature_object")
        start_time = time.time()
        popularity_dict = {'popularity': popularity}
        song_features = acoustical_features
        song_features.update(popularity_dict)
        song_features = {
            "audio_features": {
                key: song_features[key] for key in song_features.keys() & {
                    'popularity',
                    'acousticness',
                    'danceability',
                    'energy',
                    'instrumentalness',
                    'key',
                    'liveness',
                    'loudness',
                    'mode',
                    'speechiness',
                    'tempo',
                    'time_signature',
                    'valence'}}}

        df = pd.DataFrame.from_dict(
            json_normalize(
                song_features["audio_features"]),
            orient='columns')
        df = df.reindex(sorted(df.columns), axis=1)
        #print(f" TOTAL create_feature_object TIME: {time.time() - start_time} seconds")
        time_table['create_feature_object'].append(time.time() - start_time)
        return df

    def get_results(self, song_features_df):
        
        #print("START TIME FOR get_results")
        start_time = time.time()
        scaler = load("./models/scalar3.joblib")
        #print('Scaling data...')
        data_scaled = scaler.transform(song_features_df)
        normalizer = Normalizer()
        data_normalized = normalizer.fit_transform(data_scaled)
        #print('Loading pickled model...')
        model = load('./models/model5.joblib')
        results = model.kneighbors([data_normalized][0])[1:]
        #print('results returned')
        #print(f" TOTAL get_results TIME: {time.time() - start_time} seconds")
        time_table['get_results'].append(time.time() - start_time)
        return results[0]

    def filter_model(self, model_results, source_genre_list):
        
        # loop takes KNN results and filters by source track genres
        #print("START TIME FOR filter_model")
        start_time = time.time()
        #print(source_genre_list)
        #print("filter for genres initiated")
        genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb"))
        filtered_list = []
        song_list_length = 20
        stale_results = self.stale_results_list
        model_results_before = len(model_results[0][1:])
        model_results = [index for index in model_results[0]
                         [1:] if index not in stale_results]
        model_results_final = model_results_before - len(model_results)
        #print(f'{model_results_final} stale tracks were removed for the user')
        for output_song_index in model_results:
            output_genre_list = genre_array[output_song_index]
            for output_genre in output_genre_list:
                output_genre = output_genre.strip(" ")
                for source_genre in source_genre_list:
                    source_genre = "'" + source_genre + "'"
                    if source_genre == output_genre:
                        filtered_list.append(output_song_index)
                    else:
                        continue
        filtered_list = list(unique_everseen(filtered_list))
        if len(filtered_list) >= song_list_length:
            #print("filter found at least 20 genre matches")
            filtered_list = filtered_list[0:20]
        else:
            counter = song_list_length - len(filtered_list)
            #print("length of filtered list:", len(filtered_list))
            #print(f'need to add {counter} items to final song output')
            for output_song_index in model_results:
                if output_song_index not in filtered_list:
                    if counter > 0:
                        filtered_list.append(output_song_index)
                        counter -= 1
                    else:
                        break
        #print(
        #    f"filtered list with {len(filtered_list)} unique song indices returned")
        #print(f" TOTAL filter_model TIME: {time.time() - start_time} seconds")
        time_table['filter_model'].append(time.time() - start_time)
        return filtered_list

    def song_id_prediction_output(self, filtered_list):
        
        #print("START TIME FOR song_id_prediction_output")
        start_time = time.time()
        similar_songs = []
        song_id_list = []
        #print('song_id_list loading...')
        song_id_array = pickle.load(open('./data/song_id_array3.pkl', 'rb'))
        #print('song_id_list loaded')
        for song_row in filtered_list:
            song_id = song_id_array[song_row]
            similar_songs.append({'similarity': [.99], 'values': song_id})
            song_id_list.append(song_id)
        song_result_output_dict = {"songs": similar_songs}
        song_id_and_index_dict = {
            song_id: song_index for song_id,
            song_index in zip(
                song_id_list,
                filtered_list)}
        #print("Results returned")
        #print(f" TOTAL song_id_prediction_output TIME: {time.time() - start_time} seconds")
        time_table['song_id_prediction_output'].append(time.time() - start_time)
        return song_result_output_dict, song_id_and_index_dict

    def db_connect(self):
        
        #print("START TIME FOR db_connect")
        start_time = time.time()
        conn = ps.connect(host=POSTGRES_ADDRESS,
                          database=POSTGRES_DBNAME,
                          user=POSTGRES_USERNAME,
                          password=POSTGRES_PASSWORD,
                          port=POSTGRES_PORT)
        cur = conn.cursor()
        #print(f" TOTAL db_connect TIME: {time.time() - start_time} seconds")
        time_table['db_connect'].append(time.time() - start_time)
        return conn, cur

    # def get_user_ids(self):
    #     '''
    #     Retrieves user id from Spotfiy API
    #     Returns user_id, and display_name (display_name is for the database)
    #     '''
    #     current_user_dict = self.sp.current_user()
    #     display_name = current_user_dict['display_name']
    #     user_id = current_user_dict['id']
    #     print("retrieving user id and display name for current token")
    #     return user_id, display_name

    def insert_user_predictions(self):
        
        #print("START TIME FOR insert_user_predictions")
        start_time = time.time()
        try:
            conn, cur = self.db_connect()
            for song_id, song_index in self.song_id_predictions[1].items():
                cur.execute(
                    f'INSERT INTO {db_table}'
                    '(userid,songid,songlistindex,seedsongid,recdate)'
                    f' VALUES (\'{self.user_id}\',\'{song_id}\',\'{song_index}\',\'{self.song_id}\',current_timestamp);')
            conn.commit()
            conn.close()
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL insert_user_predictions TIME: {time.time() - start_time} seconds")
        time_table['insert_user_predictions'].append(time.time() - start_time)

    def get_stale_results(self):
        
        #print("START TIME FOR get_stale_results")
        start_time = time.time()
        try:
            conn, cur = self.db_connect()
            query = f'SELECT DISTINCT (songlistindex) FROM {db_table} WHERE userid = \'{self.user_id}\';'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL get_stale_results TIME: {time.time() - start_time} seconds")
        time_table['get_stale_results'].append(time.time() - start_time)
        return stale_results_list

    def get_stale_seed(self):
        #print("START TIME FOR get_stale_seed")
        start_time = time.time()
        try:
            conn,cur = self.db_connect()
            query = f'SELECT DISTINCT (seedsongid) FROM {db_table} WHERE userid = \'{self.user_id}\' AND seedsongid is not null;'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL get_stale_seed TIME: {time.time() - start_time} seconds")
        time_table['get_stale_seed'].append(time.time() - start_time)
        return stale_results_list


In [None]:
sd = Sound_Drip(token)
print(f'TOTAL_TIME: {round(sum(reduce(lambda x,y: x+y, time_table.values())),3)}')
time_table 

### TEST insert bulk ~ 2 sec decrease

In [None]:
time_table = {'get_user_ids': [],
              'get_user_song_id_source_genre': [],
              'get_acoustical_features': [],
              'get_acoustical_features': [],
              'get_popularity': [],
              'get_artist_id': [],
              'get_genres': [],
              'create_feature_object': [],
              'get_results': [],
              'filter_model': [], 
              'song_id_prediction_output': [],
              'db_connect': [],
              'insert_user_predictions': [],
              'get_stale_results': [],
              'get_stale_seed': []}

if FLASK_ENV == 'production':
    db_table = 'recommendations'
elif FLASK_ENV == 'development':
    db_table = 'recommendations_dev'

class Sound_Drip:
    

    def __init__(self, token):
        self.token = token
        self.sp = spotipy.Spotify(auth=self.token)
        self.user_id, self.display_name = self.get_user_ids()
        self.stale_seed_list = self.get_stale_seed()
        self.stale_results_list = self.get_stale_results()
        self.song_id, self.source_genre = self.get_user_song_id_source_genre()
        self.acoustical_features = self.get_acoustical_features(self.song_id)
        self.popularity = self.get_popularity(self.song_id)
        self.song_features_df = self.create_feature_object(
            self.popularity, self.acoustical_features)
        self.results = self.get_results(self.song_features_df)
        self.filtered_list = self.filter_model(self.results, self.source_genre)
        self.song_id_predictions = self.song_id_prediction_output(
            self.filtered_list)
        self.insert_user_predictions()#, print("predicts inserted into db")

    def get_user_ids(self):
        
        #print("START TIME FOR get_user_ids")
        start_time = time.time()
        current_user_dict = self.sp.current_user()
        display_name = current_user_dict['display_name']
        user_id = current_user_dict['id']
        #print("retrieving user id and display name for current token")
        #print(f"TOTAL get_user_ids TIME: {time.time() - start_time} seconds")
        time_table['get_user_ids'].append(time.time() - start_time)
        return user_id, display_name

    def get_user_song_id_source_genre(self):
        
        #print("START TIME FOR get_user_song_id_source_genre")
        start_time = time.time()
        stale_songs = self.stale_seed_list
        results = self.sp.current_user_saved_tracks(limit=50)
        for song_number in range(0, len(results['items'])):
            #print(song_number)
            song_id = results['items'][song_number]['track']['id']
            #print(song_id)
            if song_id not in stale_songs:
                artist_id = self.get_artist_id(song_id)
                genre = self.get_genres(artist_id)
                #print(genre)
                if genre != []:
                    break
                else:
                    continue
            else:
                if song_number == len(results['items']) - 1:
                    #print("application out of fresh seeds")
                    for song_id in stale_songs:
                        artist_id = self.get_artist_id(song_id)
                        genre = self.get_genres(artist_id)
                        if genre != []:
                            break
                        else:
                            continue
        #print(f" TOTAL get_user_song_id_source_genre TIME: {time.time() - start_time} seconds")
        time_table['get_user_song_id_source_genre'].append(time.time() - start_time)
        return song_id, genre

    def get_acoustical_features(self, song_id):
        
        #print("START TIME FOR get_acoustical_features")
        start_time = time.time()
        acoustical_features = self.sp.audio_features(song_id)[0]
        #print(f" TOTAL get_acoustical_features TIME: {time.time() - start_time} seconds")
        time_table['get_acoustical_features'].append(time.time() - start_time)
        return acoustical_features

    def get_popularity(self, song_id):
        
        #print("START TIME FOR get_popularity")
        start_time = time.time()
        popularity = self.sp.track(song_id)['popularity']
        #print(f" TOTAL get_popularity TIME: {time.time() - start_time} seconds")
        time_table['get_popularity'].append(time.time() - start_time)
        return popularity

    def get_artist_id(self, song_id):
        
        #print("START TIME FOR get_artist_id")
        start_time = time.time()
        artist = self.sp.track(song_id)['artists'][0]['id']
        #print(f" TOTAL get_artist_id TIME: {time.time() - start_time} seconds")
        time_table['get_artist_id'].append(time.time() - start_time)
        return artist

    def get_genres(self, artist):
        
        #print("START TIME FOR get_genres")
        start_time = time.time()
        genre = self.sp.artist(artist)['genres']
        #print(f" TOTAL get_genres TIME: {time.time() - start_time} seconds")
        time_table['get_genres'].append(time.time() - start_time)
        return genre

    def create_feature_object(self, popularity, acoustical_features):
        
        #print("START TIME FOR create_feature_object")
        start_time = time.time()
        popularity_dict = {'popularity': popularity}
        song_features = acoustical_features
        song_features.update(popularity_dict)
        song_features = {
            "audio_features": {
                key: song_features[key] for key in song_features.keys() & {
                    'popularity',
                    'acousticness',
                    'danceability',
                    'energy',
                    'instrumentalness',
                    'key',
                    'liveness',
                    'loudness',
                    'mode',
                    'speechiness',
                    'tempo',
                    'time_signature',
                    'valence'}}}

        df = pd.DataFrame.from_dict(
            json_normalize(
                song_features["audio_features"]),
            orient='columns')
        df = df.reindex(sorted(df.columns), axis=1)
        #print(f" TOTAL create_feature_object TIME: {time.time() - start_time} seconds")
        time_table['create_feature_object'].append(time.time() - start_time)
        return df

    def get_results(self, song_features_df):
        
        #print("START TIME FOR get_results")
        start_time = time.time()
        scaler = load("./models/scalar3.joblib")
        #print('Scaling data...')
        data_scaled = scaler.transform(song_features_df)
        normalizer = Normalizer()
        data_normalized = normalizer.fit_transform(data_scaled)
        #print('Loading pickled model...')
        model = load('./models/model5.joblib')
        results = model.kneighbors([data_normalized][0])[1:]
        #print('results returned')
        #print(f" TOTAL get_results TIME: {time.time() - start_time} seconds")
        time_table['get_results'].append(time.time() - start_time)
        return results[0]

    def filter_model(self, model_results, source_genre_list):
        
        # loop takes KNN results and filters by source track genres
        #print("START TIME FOR filter_model")
        start_time = time.time()
        #print(source_genre_list)
        #print("filter for genres initiated")
        genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb"))
        filtered_list = []
        song_list_length = 20
        stale_results = self.stale_results_list
        model_results_before = len(model_results[0][1:])
        model_results = [index for index in model_results[0]
                         [1:] if index not in stale_results]
        model_results_final = model_results_before - len(model_results)
        
        #print(f'{model_results_final} stale tracks were removed for the user')
        for output_song_index in model_results:
            output_genre_list = genre_array[output_song_index]
            for output_genre in output_genre_list:
                output_genre = output_genre.strip(" ")
                for source_genre in source_genre_list:
                    source_genre = "'" + source_genre + "'"
                    if source_genre == output_genre:
                        filtered_list.append(output_song_index)
                    else:
                        continue
       
        filtered_list = list(unique_everseen(filtered_list))
        if len(filtered_list) >= song_list_length:
            #print("filter found at least 20 genre matches")
            filtered_list = filtered_list[0:20]
        else:
            counter = song_list_length - len(filtered_list)
            #print("length of filtered list:", len(filtered_list))
            #print(f'need to add {counter} items to final song output')
            for output_song_index in model_results:
                if output_song_index not in filtered_list:
                    if counter > 0:
                        filtered_list.append(output_song_index)
                        counter -= 1
                    else:
                        break
        #print(
        #    f"filtered list with {len(filtered_list)} unique song indices returned")
        #print(f" TOTAL filter_model TIME: {time.time() - start_time} seconds")
        time_table['filter_model'].append(time.time() - start_time)
        return filtered_list

    def song_id_prediction_output(self, filtered_list):
        
        #print("START TIME FOR song_id_prediction_output")
        start_time = time.time()
        similar_songs = []
        song_id_list = []
        #print('song_id_list loading...')
        song_id_array = pickle.load(open('./data/song_id_array3.pkl', 'rb'))
        #print('song_id_list loaded')
        for song_row in filtered_list:
            song_id = song_id_array[song_row]
            similar_songs.append({'similarity': [.99], 'values': song_id})
            song_id_list.append(song_id)
        song_result_output_dict = {"songs": similar_songs}
        song_id_and_index_dict = {
            song_id: song_index for song_id,
            song_index in zip(
                song_id_list,
                filtered_list)}
        #print("Results returned")
        #print(f" TOTAL song_id_prediction_output TIME: {time.time() - start_time} seconds")
        time_table['song_id_prediction_output'].append(time.time() - start_time)
        return song_result_output_dict, song_id_and_index_dict

    def db_connect(self):
        
        #print("START TIME FOR db_connect")
        start_time = time.time()
        conn = ps.connect(host=POSTGRES_ADDRESS,
                          database=POSTGRES_DBNAME,
                          user=POSTGRES_USERNAME,
                          password=POSTGRES_PASSWORD,
                          port=POSTGRES_PORT)
        cur = conn.cursor()
        #print(f" TOTAL db_connect TIME: {time.time() - start_time} seconds")
        time_table['db_connect'].append(time.time() - start_time)
        return conn, cur

    # def get_user_ids(self):
    #     '''
    #     Retrieves user id from Spotfiy API
    #     Returns user_id, and display_name (display_name is for the database)
    #     '''
    #     current_user_dict = self.sp.current_user()
    #     display_name = current_user_dict['display_name']
    #     user_id = current_user_dict['id']
    #     print("retrieving user id and display name for current token")
    #     return user_id, display_name

    def insert_user_predictions(self):
        
        #print("START TIME FOR insert_user_predictions")
        start_time = time.time()
        insert_bulk = f'INSERT INTO {db_table} (userid,songid,songlistindex,seedsongid,recdate) VALUES '
        try:
            conn, cur = self.db_connect()
            for song_id, song_index in self.song_id_predictions[1].items():
                values_segment = f'(\'{self.user_id}\',\'{song_id}\',\'{song_index}\',\'{self.song_id}\',current_timestamp)'
                insert_bulk = insert_bulk  + values_segment +','
            insert_bulk = insert_bulk[:-1] + ';'
            cur.execute(insert_bulk)
            conn.commit()
            conn.close()
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL insert_user_predictions TIME: {time.time() - start_time} seconds")
        time_table['insert_user_predictions'].append(time.time() - start_time)

    def get_stale_results(self):
        
        #print("START TIME FOR get_stale_results")
        start_time = time.time()
        try:
            conn, cur = self.db_connect()
            query = f'SELECT DISTINCT (songlistindex) FROM {db_table} WHERE userid = \'{self.user_id}\';'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL get_stale_results TIME: {time.time() - start_time} seconds")
        time_table['get_stale_results'].append(time.time() - start_time)
        return stale_results_list

    def get_stale_seed(self):
        #print("START TIME FOR get_stale_seed")
        start_time = time.time()
        try:
            conn,cur = self.db_connect()
            query = f'SELECT DISTINCT (seedsongid) FROM {db_table} WHERE userid = \'{self.user_id}\' AND seedsongid is not null;'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL get_stale_seed TIME: {time.time() - start_time} seconds")
        time_table['get_stale_seed'].append(time.time() - start_time)
        return stale_results_list


In [None]:
sd = Sound_Drip(token)
print(f'TOTAL_TIME: {round(sum(reduce(lambda x,y: x+y, time_table.values())),3)}')
time_table 

### TEST refactored genre match ~ no discernable decrease

In [None]:
time_table = {'get_user_ids': [],
              'get_user_song_id_source_genre': [],
              'get_acoustical_features': [],
              'get_acoustical_features': [],
              'get_popularity': [],
              'get_artist_id': [],
              'get_genres': [],
              'create_feature_object': [],
              'get_results': [],
              'filter_model': [], 
              'song_id_prediction_output': [],
              'db_connect': [],
              'insert_user_predictions': [],
              'get_stale_results': [],
              'get_stale_seed': []}

if FLASK_ENV == 'production':
    db_table = 'recommendations'
elif FLASK_ENV == 'development':
    db_table = 'recommendations_dev'

class Sound_Drip:
    

    def __init__(self, token):
        #self.genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb"))
        self.token = token
        self.sp = spotipy.Spotify(auth=self.token)
        self.user_id, self.display_name = self.get_user_ids()
        self.stale_seed_list = self.get_stale_seed()
        self.stale_results_list = self.get_stale_results()
        self.song_id, self.source_genre = self.get_user_song_id_source_genre()
        self.acoustical_features = self.get_acoustical_features(self.song_id)
        self.popularity = self.get_popularity(self.song_id)
        self.song_features_df = self.create_feature_object(
            self.popularity, self.acoustical_features)
        self.results = self.get_results(self.song_features_df)
        self.filtered_list = self.filter_model(self.results, self.source_genre)
        self.song_id_predictions = self.song_id_prediction_output(
            self.filtered_list)
        self.insert_user_predictions()#, print("predicts inserted into db")
        #self.genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb"))

    def get_user_ids(self):
        
        #print("START TIME FOR get_user_ids")
        start_time = time.time()
        current_user_dict = self.sp.current_user()
        display_name = current_user_dict['display_name']
        user_id = current_user_dict['id']
        #print("retrieving user id and display name for current token")
        #print(f"TOTAL get_user_ids TIME: {time.time() - start_time} seconds")
        time_table['get_user_ids'].append(time.time() - start_time)
        return user_id, display_name

    def get_user_song_id_source_genre(self):
        
        #print("START TIME FOR get_user_song_id_source_genre")
        start_time = time.time()
        stale_songs = self.stale_seed_list
        results = self.sp.current_user_saved_tracks(limit=50)
        for song_number in range(0, len(results['items'])):
            #print(song_number)
            song_id = results['items'][song_number]['track']['id']
            #print(song_id)
            if song_id not in stale_songs:
                artist_id = self.get_artist_id(song_id)
                genre = self.get_genres(artist_id)
                #print(genre)
                if genre != []:
                    break
                else:
                    continue
            else:
                if song_number == len(results['items']) - 1:
                    #print("application out of fresh seeds")
                    for song_id in stale_songs:
                        artist_id = self.get_artist_id(song_id)
                        genre = self.get_genres(artist_id)
                        if genre != []:
                            break
                        else:
                            continue
        #print(f" TOTAL get_user_song_id_source_genre TIME: {time.time() - start_time} seconds")
        time_table['get_user_song_id_source_genre'].append(time.time() - start_time)
        return song_id, genre

    def get_acoustical_features(self, song_id):
        
        #print("START TIME FOR get_acoustical_features")
        start_time = time.time()
        acoustical_features = self.sp.audio_features(song_id)[0]
        #print(f" TOTAL get_acoustical_features TIME: {time.time() - start_time} seconds")
        time_table['get_acoustical_features'].append(time.time() - start_time)
        return acoustical_features

    def get_popularity(self, song_id):
        
        #print("START TIME FOR get_popularity")
        start_time = time.time()
        popularity = self.sp.track(song_id)['popularity']
        #print(f" TOTAL get_popularity TIME: {time.time() - start_time} seconds")
        time_table['get_popularity'].append(time.time() - start_time)
        return popularity

    def get_artist_id(self, song_id):
        
        #print("START TIME FOR get_artist_id")
        start_time = time.time()
        artist = self.sp.track(song_id)['artists'][0]['id']
        #print(f" TOTAL get_artist_id TIME: {time.time() - start_time} seconds")
        time_table['get_artist_id'].append(time.time() - start_time)
        return artist

    def get_genres(self, artist):
        
        #print("START TIME FOR get_genres")
        start_time = time.time()
        genre = self.sp.artist(artist)['genres']
        #print(f" TOTAL get_genres TIME: {time.time() - start_time} seconds")
        time_table['get_genres'].append(time.time() - start_time)
        return genre

    def create_feature_object(self, popularity, acoustical_features):
        
        #print("START TIME FOR create_feature_object")
        start_time = time.time()
        popularity_dict = {'popularity': popularity}
        song_features = acoustical_features
        song_features.update(popularity_dict)
        song_features = {
            "audio_features": {
                key: song_features[key] for key in song_features.keys() & {
                    'popularity',
                    'acousticness',
                    'danceability',
                    'energy',
                    'instrumentalness',
                    'key',
                    'liveness',
                    'loudness',
                    'mode',
                    'speechiness',
                    'tempo',
                    'time_signature',
                    'valence'}}}

        df = pd.DataFrame.from_dict(
            json_normalize(
                song_features["audio_features"]),
            orient='columns')
        df = df.reindex(sorted(df.columns), axis=1)
        #print(f" TOTAL create_feature_object TIME: {time.time() - start_time} seconds")
        time_table['create_feature_object'].append(time.time() - start_time)
        return df

    def get_results(self, song_features_df):
        
        #print("START TIME FOR get_results")
        start_time = time.time()
        scaler = load("./models/scalar3.joblib")
        #print('Scaling data...')
        data_scaled = scaler.transform(song_features_df)
        normalizer = Normalizer()
        data_normalized = normalizer.fit_transform(data_scaled)
        #print('Loading pickled model...')
        model = load('./models/model5.joblib')
        results = model.kneighbors([data_normalized][0])[1:]
        #print('results returned')
        #print(f" TOTAL get_results TIME: {time.time() - start_time} seconds")
        time_table['get_results'].append(time.time() - start_time)
        return results[0]

    def filter_model(self, model_results, source_genre_list):
        
        # loop takes KNN results and filters by source track genres
        #print("START TIME FOR filter_model")
        start_time = time.time()
        #print(source_genre_list)
        #print("filter for genres initiated")
        genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb")) # Moved to class values
        filtered_list = []
        song_list_length = 20
        stale_results = self.stale_results_list
        model_results_before = len(model_results[0][1:])
        model_results = [index for index in model_results[0]
                         [1:] if index not in stale_results]
        model_results_final = model_results_before - len(model_results)
        #print(f'{model_results_final} stale tracks were removed for the user')
        
        
        source_genre = set(["'" + genre + "'" for genre in source_genre_list])
        
        for output_song_index in model_results:
            
            output_genre = set(genre_array[output_song_index])
            output_genre = [elem.strip(" ") for elem in output_genre]
                
            common_ele = set.intersection(source_genre, output_genre)
            
            if len(common_ele) > 0:
                filtered_list.append(output_song_index)
        
        
        filtered_list = list(unique_everseen(filtered_list))
        #print(f'FILTERED : {filtered_list}, SOURCE: {source_genre}')
        if len(filtered_list) >= song_list_length:
            #print("filter found at least 20 genre matches")
            filtered_list = filtered_list[0:20]
        
        else:
            counter = song_list_length - len(filtered_list)
            #print("length of filtered list:", len(filtered_list))
            #print(f'need to add {counter} items to final song output')
            for output_song_index in model_results:
                if output_song_index not in filtered_list:
                    if counter > 0:
                        filtered_list.append(output_song_index)
                        counter -= 1
                    else:
                        break
        #print(
        #    f"filtered list with {len(filtered_list)} unique song indices returned")
        #print(f" TOTAL filter_model TIME: {time.time() - start_time} seconds")
        time_table['filter_model'].append(time.time() - start_time)
        return filtered_list

    def song_id_prediction_output(self, filtered_list):
        
        #print("START TIME FOR song_id_prediction_output")
        start_time = time.time()
        similar_songs = []
        song_id_list = []
        #print('song_id_list loading...')
        song_id_array = pickle.load(open('./data/song_id_array3.pkl', 'rb'))
        #print('song_id_list loaded')
        for song_row in filtered_list:
            song_id = song_id_array[song_row]
            similar_songs.append({'similarity': [.99], 'values': song_id})
            song_id_list.append(song_id)
        song_result_output_dict = {"songs": similar_songs}
        song_id_and_index_dict = {
            song_id: song_index for song_id,
            song_index in zip(
                song_id_list,
                filtered_list)}
        #print("Results returned")
        #print(f" TOTAL song_id_prediction_output TIME: {time.time() - start_time} seconds")
        time_table['song_id_prediction_output'].append(time.time() - start_time)
        return song_result_output_dict, song_id_and_index_dict

    def db_connect(self):
        
        #print("START TIME FOR db_connect")
        start_time = time.time()
        conn = ps.connect(host=POSTGRES_ADDRESS,
                          database=POSTGRES_DBNAME,
                          user=POSTGRES_USERNAME,
                          password=POSTGRES_PASSWORD,
                          port=POSTGRES_PORT)
        cur = conn.cursor()
        #print(f" TOTAL db_connect TIME: {time.time() - start_time} seconds")
        time_table['db_connect'].append(time.time() - start_time)
        return conn, cur

    # def get_user_ids(self):
    #     '''
    #     Retrieves user id from Spotfiy API
    #     Returns user_id, and display_name (display_name is for the database)
    #     '''
    #     current_user_dict = self.sp.current_user()
    #     display_name = current_user_dict['display_name']
    #     user_id = current_user_dict['id']
    #     print("retrieving user id and display name for current token")
    #     return user_id, display_name

    def insert_user_predictions(self):
        
        #print("START TIME FOR insert_user_predictions")
        start_time = time.time()
        insert_bulk = f'INSERT INTO {db_table} (userid,songid,songlistindex,seedsongid,recdate) VALUES '
        try:
            conn, cur = self.db_connect()
            for song_id, song_index in self.song_id_predictions[1].items():
                values_segment = f'(\'{self.user_id}\',\'{song_id}\',\'{song_index}\',\'{self.song_id}\',current_timestamp)'
                insert_bulk = insert_bulk  + values_segment +','
            insert_bulk = insert_bulk[:-1] + ';'
            cur.execute(insert_bulk)
            conn.commit()
            conn.close()
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL insert_user_predictions TIME: {time.time() - start_time} seconds")
        time_table['insert_user_predictions'].append(time.time() - start_time)

    def get_stale_results(self):
        
        #print("START TIME FOR get_stale_results")
        start_time = time.time()
        try:
            conn, cur = self.db_connect()
            query = f'SELECT DISTINCT (songlistindex) FROM {db_table} WHERE userid = \'{self.user_id}\';'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL get_stale_results TIME: {time.time() - start_time} seconds")
        time_table['get_stale_results'].append(time.time() - start_time)
        return stale_results_list

    def get_stale_seed(self):
        #print("START TIME FOR get_stale_seed")
        start_time = time.time()
        try:
            conn,cur = self.db_connect()
            query = f'SELECT DISTINCT (seedsongid) FROM {db_table} WHERE userid = \'{self.user_id}\' AND seedsongid is not null;'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL get_stale_seed TIME: {time.time() - start_time} seconds")
        time_table['get_stale_seed'].append(time.time() - start_time)
        return stale_results_list


In [None]:
sd = Sound_Drip(token)
print(f'TOTAL_TIME: {round(sum(reduce(lambda x,y: x+y, time_table.values())),3)}')
time_table

### TEST move pickle loads out of process, adding to init for now, probs not for prod

In [None]:
time_table = {'get_user_ids': [],
              'get_user_song_id_source_genre': [],
              'get_acoustical_features': [],
              'get_acoustical_features': [],
              'get_popularity': [],
              'get_artist_id': [],
              'get_genres': [],
              'create_feature_object': [],
              'get_results': [],
              'filter_model': [], 
              'song_id_prediction_output': [],
              'db_connect': [],
              'insert_user_predictions': [],
              'get_stale_results': [],
              'get_stale_seed': []}

if FLASK_ENV == 'production':
    db_table = 'recommendations'
elif FLASK_ENV == 'development':
    db_table = 'recommendations_dev'

class Sound_Drip:
    

    def __init__(self, token):
        self.genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb"))
        self.scaler = load("./models/scalar3.joblib")
        self.model = load('./models/model5.joblib')
        self.song_id_array = pickle.load(open('./data/song_id_array3.pkl', 'rb'))
        self.token = token
        self.sp = spotipy.Spotify(auth=self.token)
        self.user_id, self.display_name = self.get_user_ids()
        self.stale_seed_list = self.get_stale_seed()
        self.stale_results_list = self.get_stale_results()
        self.song_id, self.source_genre = self.get_user_song_id_source_genre()
        self.acoustical_features = self.get_acoustical_features(self.song_id)
        self.popularity = self.get_popularity(self.song_id)
        self.song_features_df = self.create_feature_object(
            self.popularity, self.acoustical_features)
        self.results = self.get_results(self.song_features_df)
        self.filtered_list = self.filter_model(self.results, self.source_genre)
        self.song_id_predictions = self.song_id_prediction_output(
            self.filtered_list)
        self.insert_user_predictions()#, print("predicts inserted into db")
        #self.genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb"))

    def get_user_ids(self):
        
        #print("START TIME FOR get_user_ids")
        start_time = time.time()
        current_user_dict = self.sp.current_user()
        display_name = current_user_dict['display_name']
        user_id = current_user_dict['id']
        #print("retrieving user id and display name for current token")
        #print(f"TOTAL get_user_ids TIME: {time.time() - start_time} seconds")
        time_table['get_user_ids'].append(time.time() - start_time)
        return user_id, display_name

    def get_user_song_id_source_genre(self):
        
        #print("START TIME FOR get_user_song_id_source_genre")
        start_time = time.time()
        stale_songs = self.stale_seed_list
        results = self.sp.current_user_saved_tracks(limit=50)
        for song_number in range(0, len(results['items'])):
            #print(song_number)
            song_id = results['items'][song_number]['track']['id']
            #print(song_id)
            if song_id not in stale_songs:
                artist_id = self.get_artist_id(song_id)
                genre = self.get_genres(artist_id)
                #print(genre)
                if genre != []:
                    break
                else:
                    continue
            else:
                if song_number == len(results['items']) - 1:
                    #print("application out of fresh seeds")
                    for song_id in stale_songs:
                        artist_id = self.get_artist_id(song_id)
                        genre = self.get_genres(artist_id)
                        if genre != []:
                            break
                        else:
                            continue
        #print(f" TOTAL get_user_song_id_source_genre TIME: {time.time() - start_time} seconds")
        time_table['get_user_song_id_source_genre'].append(time.time() - start_time)
        return song_id, genre

    def get_acoustical_features(self, song_id):
        
        #print("START TIME FOR get_acoustical_features")
        start_time = time.time()
        acoustical_features = self.sp.audio_features(song_id)[0]
        #print(f" TOTAL get_acoustical_features TIME: {time.time() - start_time} seconds")
        time_table['get_acoustical_features'].append(time.time() - start_time)
        return acoustical_features

    def get_popularity(self, song_id):
        
        #print("START TIME FOR get_popularity")
        start_time = time.time()
        popularity = self.sp.track(song_id)['popularity']
        #print(f" TOTAL get_popularity TIME: {time.time() - start_time} seconds")
        time_table['get_popularity'].append(time.time() - start_time)
        return popularity

    def get_artist_id(self, song_id):
        
        #print("START TIME FOR get_artist_id")
        start_time = time.time()
        artist = self.sp.track(song_id)['artists'][0]['id']
        #print(f" TOTAL get_artist_id TIME: {time.time() - start_time} seconds")
        time_table['get_artist_id'].append(time.time() - start_time)
        return artist

    def get_genres(self, artist):
        
        #print("START TIME FOR get_genres")
        start_time = time.time()
        genre = self.sp.artist(artist)['genres']
        #print(f" TOTAL get_genres TIME: {time.time() - start_time} seconds")
        time_table['get_genres'].append(time.time() - start_time)
        return genre

    def create_feature_object(self, popularity, acoustical_features):
        
        #print("START TIME FOR create_feature_object")
        start_time = time.time()
        popularity_dict = {'popularity': popularity}
        song_features = acoustical_features
        song_features.update(popularity_dict)
        song_features = {
            "audio_features": {
                key: song_features[key] for key in song_features.keys() & {
                    'popularity',
                    'acousticness',
                    'danceability',
                    'energy',
                    'instrumentalness',
                    'key',
                    'liveness',
                    'loudness',
                    'mode',
                    'speechiness',
                    'tempo',
                    'time_signature',
                    'valence'}}}

        df = pd.DataFrame.from_dict(
            json_normalize(
                song_features["audio_features"]),
            orient='columns')
        df = df.reindex(sorted(df.columns), axis=1)
        #print(f" TOTAL create_feature_object TIME: {time.time() - start_time} seconds")
        time_table['create_feature_object'].append(time.time() - start_time)
        return df

    def get_results(self, song_features_df):
        
        #print("START TIME FOR get_results")
        start_time = time.time()
        #scaler = load("./models/scalar3.joblib")
        #print('Scaling data...')
        data_scaled = self.scaler.transform(song_features_df)
        normalizer = Normalizer()
        data_normalized = normalizer.fit_transform(data_scaled)
        #print('Loading pickled model...')
        #model = load('./models/model5.joblib')
        results = self.model.kneighbors([data_normalized][0])[1:]
        #print('results returned')
        #print(f" TOTAL get_results TIME: {time.time() - start_time} seconds")
        time_table['get_results'].append(time.time() - start_time)
        return results[0]

    def filter_model(self, model_results, source_genre_list):
        
        # loop takes KNN results and filters by source track genres
        #print("START TIME FOR filter_model")
        start_time = time.time()
        #print(source_genre_list)
        #print("filter for genres initiated")
        #genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb")) # Moved to class values
        filtered_list = []
        song_list_length = 20
        stale_results = self.stale_results_list
        model_results_before = len(model_results[0][1:])
        model_results = [index for index in model_results[0]
                         [1:] if index not in stale_results]
        model_results_final = model_results_before - len(model_results)
        #print(f'{model_results_final} stale tracks were removed for the user')
        
        
        source_genre = set(["'" + genre + "'" for genre in source_genre_list])
        
        for output_song_index in model_results:
            
            output_genre = set(self.genre_array[output_song_index])
            output_genre = [elem.strip(" ") for elem in output_genre]
                
            common_ele = set.intersection(source_genre, output_genre)
            
            if len(common_ele) > 0:
                filtered_list.append(output_song_index)
        
        
        filtered_list = list(unique_everseen(filtered_list))
        #print(f'FILTERED : {filtered_list}, SOURCE: {source_genre}')
        if len(filtered_list) >= song_list_length:
            #print("filter found at least 20 genre matches")
            filtered_list = filtered_list[0:20]
        
        else:
            counter = song_list_length - len(filtered_list)
            #print("length of filtered list:", len(filtered_list))
            #print(f'need to add {counter} items to final song output')
            for output_song_index in model_results:
                if output_song_index not in filtered_list:
                    if counter > 0:
                        filtered_list.append(output_song_index)
                        counter -= 1
                    else:
                        break
        #print(
        #    f"filtered list with {len(filtered_list)} unique song indices returned")
        #print(f" TOTAL filter_model TIME: {time.time() - start_time} seconds")
        time_table['filter_model'].append(time.time() - start_time)
        return filtered_list

    def song_id_prediction_output(self, filtered_list):
        
        #print("START TIME FOR song_id_prediction_output")
        start_time = time.time()
        similar_songs = []
        song_id_list = []
        #print('song_id_list loading...')
        #song_id_array = pickle.load(open('./data/song_id_array3.pkl', 'rb'))
        #print('song_id_list loaded')
        for song_row in filtered_list:
            song_id = self.song_id_array[song_row]
            similar_songs.append({'similarity': [.99], 'values': song_id})
            song_id_list.append(song_id)
        song_result_output_dict = {"songs": similar_songs}
        song_id_and_index_dict = {
            song_id: song_index for song_id,
            song_index in zip(
                song_id_list,
                filtered_list)}
        #print("Results returned")
        #print(f" TOTAL song_id_prediction_output TIME: {time.time() - start_time} seconds")
        time_table['song_id_prediction_output'].append(time.time() - start_time)
        return song_result_output_dict, song_id_and_index_dict

    def db_connect(self):
        
        #print("START TIME FOR db_connect")
        start_time = time.time()
        conn = ps.connect(host=POSTGRES_ADDRESS,
                          database=POSTGRES_DBNAME,
                          user=POSTGRES_USERNAME,
                          password=POSTGRES_PASSWORD,
                          port=POSTGRES_PORT)
        cur = conn.cursor()
        #print(f" TOTAL db_connect TIME: {time.time() - start_time} seconds")
        time_table['db_connect'].append(time.time() - start_time)
        return conn, cur

    # def get_user_ids(self):
    #     '''
    #     Retrieves user id from Spotfiy API
    #     Returns user_id, and display_name (display_name is for the database)
    #     '''
    #     current_user_dict = self.sp.current_user()
    #     display_name = current_user_dict['display_name']
    #     user_id = current_user_dict['id']
    #     print("retrieving user id and display name for current token")
    #     return user_id, display_name

    def insert_user_predictions(self):
        
        #print("START TIME FOR insert_user_predictions")
        start_time = time.time()
        insert_bulk = f'INSERT INTO {db_table} (userid,songid,songlistindex,seedsongid,recdate) VALUES '
        try:
            conn, cur = self.db_connect()
            for song_id, song_index in self.song_id_predictions[1].items():
                values_segment = f'(\'{self.user_id}\',\'{song_id}\',\'{song_index}\',\'{self.song_id}\',current_timestamp)'
                insert_bulk = insert_bulk  + values_segment +','
            insert_bulk = insert_bulk[:-1] + ';'
            cur.execute(insert_bulk)
            conn.commit()
            conn.close()
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL insert_user_predictions TIME: {time.time() - start_time} seconds")
        time_table['insert_user_predictions'].append(time.time() - start_time)

    def get_stale_results(self):
        
        #print("START TIME FOR get_stale_results")
        start_time = time.time()
        try:
            conn, cur = self.db_connect()
            query = f'SELECT DISTINCT (songlistindex) FROM {db_table} WHERE userid = \'{self.user_id}\';'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL get_stale_results TIME: {time.time() - start_time} seconds")
        time_table['get_stale_results'].append(time.time() - start_time)
        return stale_results_list

    def get_stale_seed(self):
        #print("START TIME FOR get_stale_seed")
        start_time = time.time()
        try:
            conn,cur = self.db_connect()
            query = f'SELECT DISTINCT (seedsongid) FROM {db_table} WHERE userid = \'{self.user_id}\' AND seedsongid is not null;'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            sys.exit(1)
        finally:
            if conn:
                conn.close()
        #print(f" TOTAL get_stale_seed TIME: {time.time() - start_time} seconds")
        time_table['get_stale_seed'].append(time.time() - start_time)
        return stale_results_list

In [None]:
sd = Sound_Drip(token)
print(f'TOTAL_TIME: {round(sum(reduce(lambda x,y: x+y, time_table.values())),3)}')
time_table

#### TEST upfront DB connection, add method to close

In [11]:
time_table = {'get_user_ids': [],
              'get_user_song_id_source_genre': [],
              'get_acoustical_features': [],
              'get_acoustical_features': [],
              'get_popularity': [],
              'get_artist_id': [],
              'get_genres': [],
              'create_feature_object': [],
              'get_results': [],
              'filter_model': [], 
              'song_id_prediction_output': [],
              'db_connect': [],
              'insert_user_predictions': [],
              'get_stale_results': [],
              'get_stale_seed': []}

if FLASK_ENV == 'production':
    db_table = 'recommendations'
elif FLASK_ENV == 'development':
    db_table = 'recommendations_dev'

class Sound_Drip:
    

    def __init__(self, token):
        self.genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb"))
        self.scaler = load("./models/scalar3.joblib")
        self.model = load('./models/model5.joblib')
        self.song_id_array = pickle.load(open('./data/song_id_array3.pkl', 'rb'))
        self.token = token
        self.sp = spotipy.Spotify(auth=self.token)
        self.user_id, self.display_name = self.get_user_ids()
        self.db_conn, self.db_cur = self.db_connect()
        self.stale_seed_list = self.get_stale_seed()
        self.stale_results_list = self.get_stale_results()
        self.song_id, self.source_genre = self.get_user_song_id_source_genre()
        self.acoustical_features = self.get_acoustical_features(self.song_id)
        self.popularity = self.get_popularity(self.song_id)
        self.song_features_df = self.create_feature_object(
            self.popularity, self.acoustical_features)
        self.results = self.get_results(self.song_features_df)
        self.filtered_list = self.filter_model(self.results, self.source_genre)
        self.song_id_predictions = self.song_id_prediction_output(
            self.filtered_list)
        self.insert_user_predictions()#, print("predicts inserted into db")
        self.close_db_con = self.db_close
        

    def get_user_ids(self):
        
   
        start_time = time.time()
        current_user_dict = self.sp.current_user()
        display_name = current_user_dict['display_name']
        user_id = current_user_dict['id']
        
        time_table['get_user_ids'].append(round(time.time() - start_time, 4))
        return user_id, display_name

    def get_user_song_id_source_genre(self):
        
        start_time = time.time()
        stale_songs = self.stale_seed_list
        
        results = self.sp.current_user_saved_tracks(limit=50)
        
        for song_number in range(0, len(results['items'])):
            
            song_id = results['items'][song_number]['track']['id']
            
            if song_id not in stale_songs:
                artist_id = self.get_artist_id(song_id)
                genre = self.get_genres(artist_id)
                
                if genre != []:
                    break
                else:
                    continue
            else:
                if song_number == len(results['items']) - 1:
                    
                    for song_id in stale_songs:
                        artist_id = self.get_artist_id(song_id)
                        genre = self.get_genres(artist_id)
                        if genre != []:
                            break
                        else:
                            continue
        
        time_table['get_user_song_id_source_genre'].append(round(time.time() - start_time, 4))
        return song_id, genre

    def get_acoustical_features(self, song_id):
        
        start_time = time.time()
        acoustical_features = self.sp.audio_features(song_id)[0]
        
        time_table['get_acoustical_features'].append(round(time.time() - start_time, 4))
        return acoustical_features

    def get_popularity(self, song_id):
        
        start_time = time.time()
        popularity = self.sp.track(song_id)['popularity']
        
        time_table['get_popularity'].append(round(time.time() - start_time, 4))
        return popularity

    def get_artist_id(self, song_id):
        
        start_time = time.time()
        artist = self.sp.track(song_id)['artists'][0]['id']
        
        time_table['get_artist_id'].append(round(time.time() - start_time, 4))
        return artist

    def get_genres(self, artist):
        
       
        start_time = time.time()
        genre = self.sp.artist(artist)['genres']
        
        time_table['get_genres'].append(round(time.time() - start_time, 4))
        return genre

    def create_feature_object(self, popularity, acoustical_features):
        
        
        start_time = time.time()
        popularity_dict = {'popularity': popularity}
        song_features = acoustical_features
        song_features.update(popularity_dict)
        song_features = {
            "audio_features": {
                key: song_features[key] for key in song_features.keys() & {
                    'popularity',
                    'acousticness',
                    'danceability',
                    'energy',
                    'instrumentalness',
                    'key',
                    'liveness',
                    'loudness',
                    'mode',
                    'speechiness',
                    'tempo',
                    'time_signature',
                    'valence'}}}

        df = pd.DataFrame.from_dict(
            json_normalize(
                song_features["audio_features"]),
            orient='columns')
        df = df.reindex(sorted(df.columns), axis=1)
        
        time_table['create_feature_object'].append(round(time.time() - start_time, 4))
        return df

    def get_results(self, song_features_df):
        
        
        start_time = time.time()
        
        data_scaled = self.scaler.transform(song_features_df)
        normalizer = Normalizer()
        data_normalized = normalizer.fit_transform(data_scaled)
        
        results = self.model.kneighbors([data_normalized][0])[1:]
        
        time_table['get_results'].append(round(time.time() - start_time, 4))
        return results[0]

    def filter_model(self, model_results, source_genre_list):
        
        
        start_time = time.time()
        
        filtered_list = []
        song_list_length = 20
        stale_results = self.stale_results_list
        model_results_before = len(model_results[0][1:])
        
        mr_start_time = time.time()
        model_results = [index for index in model_results[0]
                         [1:] if index not in stale_results]
        
        print(f'mr: {time.time() - mr_start_time}')
        
        model_results_final = model_results_before - len(model_results)
        
        
        match_start_time = time.time()
        source_genre = set(["'" + genre + "'" for genre in source_genre_list])
        
        for output_song_index in model_results:
            
            output_genre = set(self.genre_array[output_song_index])
            output_genre = [elem.strip(" ") for elem in output_genre]
                
            common_ele = set.intersection(source_genre, output_genre)
            
            if len(common_ele) > 0:
                filtered_list.append(output_song_index)
        
        print(f'match: {time.time() - match_start_time}')
        filtered_list = list(unique_everseen(filtered_list))
        
        if len(filtered_list) >= song_list_length:
            
            filtered_list = filtered_list[0:20]
        
        else:
            counter = song_list_length - len(filtered_list)
            
            for output_song_index in model_results:
                if output_song_index not in filtered_list:
                    if counter > 0:
                        filtered_list.append(output_song_index)
                        counter -= 1
                    else:
                        break
        
        time_table['filter_model'].append(round(time.time() - start_time, 4))
        return filtered_list

    def song_id_prediction_output(self, filtered_list):
        
        
        start_time = time.time()
        
        similar_songs = []
        song_id_list = []
        
        for song_row in filtered_list:
            song_id = self.song_id_array[song_row]
            similar_songs.append({'similarity': [.99], 'values': song_id})
            song_id_list.append(song_id)
        
        song_result_output_dict = {"songs": similar_songs}
        song_id_and_index_dict = {
            song_id: song_index for song_id,
            song_index in zip(
                song_id_list,
                filtered_list)}
        
        
        
        time_table['song_id_prediction_output'].append(round(time.time() - start_time, 4))
        return song_result_output_dict, song_id_and_index_dict

    def db_connect(self):
        
        
        start_time = time.time()
        conn = ps.connect(host=POSTGRES_ADDRESS,
                          database=POSTGRES_DBNAME,
                          user=POSTGRES_USERNAME,
                          password=POSTGRES_PASSWORD,
                          port=POSTGRES_PORT)
        cur = conn.cursor()
        
        time_table['db_connect'].append(round(time.time() - start_time, 4))
        return conn, cur
    
    
    def db_close(self):
        self.db_conn.close()
    
    

    def insert_user_predictions(self):
        
        
        start_time = time.time()
        insert_bulk = f'INSERT INTO {db_table} (userid,songid,songlistindex,seedsongid,recdate) VALUES '
        try:
            conn, cur = self.db_conn, self.db_cur
            for song_id, song_index in self.song_id_predictions[1].items():
                values_segment = f'(\'{self.user_id}\',\'{song_id}\',\'{song_index}\',\'{self.song_id}\',current_timestamp)'
                insert_bulk = insert_bulk  + values_segment +','
            insert_bulk = insert_bulk[:-1] + ';'
            cur.execute(insert_bulk)
            conn.commit()
            conn.close()
        except ps.DatabaseError as e:
            print(f'Error {e}')
            self.db_conn.close()
            sys.exit(1)

        time_table['insert_user_predictions'].append(round(time.time() - start_time, 4))

    def get_stale_results(self):
        
        
        start_time = time.time()
        try:
            conn, cur = self.db_conn, self.db_cur
            query = f'SELECT DISTINCT (songlistindex) FROM {db_table} WHERE userid = \'{self.user_id}\';'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            self.db_conn.close()
            sys.exit(1)

        time_table['get_stale_results'].append(round(time.time() - start_time, 4))
        return stale_results_list

    def get_stale_seed(self):
        
        start_time = time.time()
        
        try:
            conn,cur = self.db_conn, self.db_cur
            query = f'SELECT DISTINCT (seedsongid) FROM {db_table} WHERE userid = \'{self.user_id}\' AND seedsongid is not null;'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        
        except ps.DatabaseError as e:
            print(f'Error {e}')
            self.db_conn.close()
            sys.exit(1)

        time_table['get_stale_seed'].append(round(time.time() - start_time, 4))
        return stale_results_list

In [12]:
sd = Sound_Drip(token)
print(f'TOTAL_TIME: {round(sum(reduce(lambda x,y: x+y, time_table.values())),4)}')
time_table

mr: 0.918571949005127
match: 0.006982326507568359
TOTAL_TIME: 3.3997


{'get_user_ids': [0.2057],
 'get_user_song_id_source_genre': [0.315],
 'get_acoustical_features': [0.0795],
 'get_popularity': [0.0933],
 'get_artist_id': [0.0815],
 'get_genres': [0.0847],
 'create_feature_object': [0.001],
 'get_results': [0.0169],
 'filter_model': [0.9256],
 'song_id_prediction_output': [0.0],
 'db_connect': [0.8512],
 'insert_user_predictions': [0.2483],
 'get_stale_results': [0.2522],
 'get_stale_seed': [0.2448]}

#### TEST faster filter_model using sets to clear stale results 0.9 >0.001

In [41]:
time_table = {'get_user_ids': [],
              'get_user_song_id_source_genre': [],
              'get_acoustical_features': [],
              'get_acoustical_features': [],
              'get_popularity': [],
              'get_artist_id': [],
              'get_genres': [],
              'create_feature_object': [],
              'get_results': [],
              'filter_model': [], 
              'song_id_prediction_output': [],
              'db_connect': [],
              'insert_user_predictions': [],
              'get_stale_results': [],
              'get_stale_seed': []}

if FLASK_ENV == 'production':
    db_table = 'recommendations'
elif FLASK_ENV == 'development':
    db_table = 'recommendations_dev'

class Sound_Drip:
    

    def __init__(self, token):
        self.genre_array = pickle.load(open("./data/genres_array_2.pkl", "rb"))
        self.scaler = load("./models/scalar3.joblib")
        self.model = load('./models/model5.joblib')
        self.song_id_array = pickle.load(open('./data/song_id_array3.pkl', 'rb'))
        self.token = token
        self.sp = spotipy.Spotify(auth=self.token)
        self.user_id, self.display_name = self.get_user_ids()
        self.db_conn, self.db_cur = self.db_connect()
        self.stale_seed_list = self.get_stale_seed()
        self.stale_results_list = self.get_stale_results()
        self.song_id, self.source_genre = self.get_user_song_id_source_genre()
        self.acoustical_features = self.get_acoustical_features(self.song_id)
        self.popularity = self.get_popularity(self.song_id)
        self.song_features_df = self.create_feature_object(
            self.popularity, self.acoustical_features)
        self.results = self.get_results(self.song_features_df)
        self.filtered_list = self.filter_model(self.results, self.source_genre)
        self.song_id_predictions = self.song_id_prediction_output(
            self.filtered_list)
        self.insert_user_predictions()#, print("predicts inserted into db")
        self.close_db_con = self.db_close
        

    def get_user_ids(self):
        
   
        start_time = time.time()
        current_user_dict = self.sp.current_user()
        display_name = current_user_dict['display_name']
        user_id = current_user_dict['id']
        
        time_table['get_user_ids'].append(round(time.time() - start_time, 4))
        return user_id, display_name

    def get_user_song_id_source_genre(self):
        
        start_time = time.time()
        stale_songs = self.stale_seed_list
        
        results = self.sp.current_user_saved_tracks(limit=50)
        
        for song_number in range(0, len(results['items'])):
            
            song_id = results['items'][song_number]['track']['id']
            
            if song_id not in stale_songs:
                artist_id = self.get_artist_id(song_id)
                genre = self.get_genres(artist_id)
                
                if genre != []:
                    break
                else:
                    continue
            else:
                if song_number == len(results['items']) - 1:
                    
                    for song_id in stale_songs:
                        artist_id = self.get_artist_id(song_id)
                        genre = self.get_genres(artist_id)
                        if genre != []:
                            break
                        else:
                            continue
        
        time_table['get_user_song_id_source_genre'].append(round(time.time() - start_time, 4))
        return song_id, genre

    def get_acoustical_features(self, song_id):
        
        start_time = time.time()
        acoustical_features = self.sp.audio_features(song_id)[0]
        
        time_table['get_acoustical_features'].append(round(time.time() - start_time, 4))
        return acoustical_features

    def get_popularity(self, song_id):
        
        start_time = time.time()
        popularity = self.sp.track(song_id)['popularity']
        
        time_table['get_popularity'].append(round(time.time() - start_time, 4))
        return popularity

    def get_artist_id(self, song_id):
        
        start_time = time.time()
        artist = self.sp.track(song_id)['artists'][0]['id']
        
        time_table['get_artist_id'].append(round(time.time() - start_time, 4))
        return artist

    def get_genres(self, artist):
        
       
        start_time = time.time()
        genre = self.sp.artist(artist)['genres']
        
        time_table['get_genres'].append(round(time.time() - start_time, 4))
        return genre

    def create_feature_object(self, popularity, acoustical_features):
        
        
        start_time = time.time()
        popularity_dict = {'popularity': popularity}
        song_features = acoustical_features
        song_features.update(popularity_dict)
        song_features = {
            "audio_features": {
                key: song_features[key] for key in song_features.keys() & {
                    'popularity',
                    'acousticness',
                    'danceability',
                    'energy',
                    'instrumentalness',
                    'key',
                    'liveness',
                    'loudness',
                    'mode',
                    'speechiness',
                    'tempo',
                    'time_signature',
                    'valence'}}}

        df = pd.DataFrame.from_dict(
            json_normalize(
                song_features["audio_features"]),
            orient='columns')
        df = df.reindex(sorted(df.columns), axis=1)
        
        time_table['create_feature_object'].append(round(time.time() - start_time, 4))
        return df

    def get_results(self, song_features_df):
        
        
        start_time = time.time()
        
        data_scaled = self.scaler.transform(song_features_df)
        normalizer = Normalizer()
        data_normalized = normalizer.fit_transform(data_scaled)
        
        results = self.model.kneighbors([data_normalized][0])[1:]
        
        time_table['get_results'].append(round(time.time() - start_time, 4))
        return results[0]

    def filter_model(self, model_results, source_genre_list):
        
        
        start_time = time.time()
        
        filtered_list = []
        song_list_length = 20
        stale_results = self.stale_results_list
        model_results_before = len(model_results[0][1:])
        
        
        
        stl_res = set.intersection(set(sd.results[0][1:]), set(sd.stale_results_list))
        model_results = [index for index in sd.results[0][1:] if index not in stl_res]
        
        
        
        
        model_results_final = model_results_before - len(model_results)
        
        
        
        source_genre = set(["'" + genre + "'" for genre in source_genre_list])
        
        for output_song_index in model_results:
            
            output_genre = set(self.genre_array[output_song_index])
            output_genre = [elem.strip(" ") for elem in output_genre]
                
            common_ele = set.intersection(source_genre, output_genre)
            
            if len(common_ele) > 0:
                filtered_list.append(output_song_index)
        
        
        filtered_list = list(unique_everseen(filtered_list))
        
        if len(filtered_list) >= song_list_length:
            
            filtered_list = filtered_list[0:20]
        
        else:
            counter = song_list_length - len(filtered_list)
            
            for output_song_index in model_results:
                if output_song_index not in filtered_list:
                    if counter > 0:
                        filtered_list.append(output_song_index)
                        counter -= 1
                    else:
                        break
        
        time_table['filter_model'].append(round(time.time() - start_time, 4))
        return filtered_list

    def song_id_prediction_output(self, filtered_list):
        
        
        start_time = time.time()
        
        similar_songs = []
        song_id_list = []
        
        for song_row in filtered_list:
            song_id = self.song_id_array[song_row]
            similar_songs.append({'similarity': [.99], 'values': song_id})
            song_id_list.append(song_id)
        
        song_result_output_dict = {"songs": similar_songs}
        song_id_and_index_dict = {
            song_id: song_index for song_id,
            song_index in zip(
                song_id_list,
                filtered_list)}
        
        
        
        time_table['song_id_prediction_output'].append(round(time.time() - start_time, 4))
        return song_result_output_dict, song_id_and_index_dict

    def db_connect(self):
        
        
        start_time = time.time()
        conn = ps.connect(host=POSTGRES_ADDRESS,
                          database=POSTGRES_DBNAME,
                          user=POSTGRES_USERNAME,
                          password=POSTGRES_PASSWORD,
                          port=POSTGRES_PORT)
        cur = conn.cursor()
        
        time_table['db_connect'].append(round(time.time() - start_time, 4))
        return conn, cur
    
    
    def db_close(self):
        self.db_conn.close()
    
    

    def insert_user_predictions(self):
        
        
        start_time = time.time()
        insert_bulk = f'INSERT INTO {db_table} (userid,songid,songlistindex,seedsongid,recdate) VALUES '
        try:
            conn, cur = self.db_conn, self.db_cur
            for song_id, song_index in self.song_id_predictions[1].items():
                values_segment = f'(\'{self.user_id}\',\'{song_id}\',\'{song_index}\',\'{self.song_id}\',current_timestamp)'
                insert_bulk = insert_bulk  + values_segment +','
            insert_bulk = insert_bulk[:-1] + ';'
            cur.execute(insert_bulk)
            conn.commit()
            conn.close()
        except ps.DatabaseError as e:
            print(f'Error {e}')
            self.db_conn.close()
            sys.exit(1)

        time_table['insert_user_predictions'].append(round(time.time() - start_time, 4))

    def get_stale_results(self):
        
        
        start_time = time.time()
        try:
            conn, cur = self.db_conn, self.db_cur
            query = f'SELECT DISTINCT (songlistindex) FROM {db_table} WHERE userid = \'{self.user_id}\';'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        except ps.DatabaseError as e:
            print(f'Error {e}')
            self.db_conn.close()
            sys.exit(1)

        time_table['get_stale_results'].append(round(time.time() - start_time, 4))
        return stale_results_list

    def get_stale_seed(self):
        
        start_time = time.time()
        
        try:
            conn,cur = self.db_conn, self.db_cur
            query = f'SELECT DISTINCT (seedsongid) FROM {db_table} WHERE userid = \'{self.user_id}\' AND seedsongid is not null;'
            cur.execute(query)
            query_results = cur.fetchall()
            stale_results_list = [index[0] for index in query_results]
        
        except ps.DatabaseError as e:
            print(f'Error {e}')
            self.db_conn.close()
            sys.exit(1)

        time_table['get_stale_seed'].append(round(time.time() - start_time, 4))
        return stale_results_list

In [42]:
sd = Sound_Drip(token)
print(f'TOTAL_TIME: {round(sum(reduce(lambda x,y: x+y, time_table.values())),4)}')
time_table

TOTAL_TIME: 2.4382


{'get_user_ids': [0.2048],
 'get_user_song_id_source_genre': [0.3172],
 'get_acoustical_features': [0.0768],
 'get_popularity': [0.083],
 'get_artist_id': [0.089],
 'get_genres': [0.0918],
 'create_feature_object': [0.007],
 'get_results': [0.0229],
 'filter_model': [0.008],
 'song_id_prediction_output': [0.0],
 'db_connect': [0.7992],
 'insert_user_predictions': [0.2411],
 'get_stale_results': [0.2402],
 'get_stale_seed': [0.2572]}

In [21]:
st = time.time()
model_results = [index for index in sd.results[0][1:] if index not in sd.stale_results_list]
time.time() - st

0.9205648899078369

In [26]:
sd.results[0][1:]

array([164440, 111521, 513325, ..., 196329, 327380, 386192], dtype=int64)

In [33]:
st = time.time()
model_results = [index for index in sd.results[0][1:] if index not in sd.stale_results_list]
print(time.time() - st)
len(model_results)

0.920565128326416


4570

In [34]:
st = time.time()
stl_res = set.intersection(set(sd.results[0][1:]), set(sd.stale_results_list))
model_results = [index for index in sd.results[0][1:] if index not in stl_res]
print(time.time() - st)
len(model_results)

0.0019948482513427734


4570

429

In [40]:
a= ['p','d','x']

list(unique_everseen(a))

['p', 'd', 'x']