In [6]:
# Importing libraries

import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

# Import Data

In [3]:
con = sqlite3.connect("song_list_v3.db")
# Load the data into a DataFrame
main_df = pd.read_sql_query("SELECT * from songs", con)
con.close()

# Cleaning/Scaling Data

In [7]:
main_df.shape, main_df.columns

((574018, 20),
 Index(['index', 'acousticness', 'artist', 'danceability', 'duration_ms',
        'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
        'songid', 'speechiness', 'tempo', 'time_signature', 'track', 'valence',
        'genres', 'popularity', 'availability'],
       dtype='object'))

In [27]:
# dropping unnecessary columns
X = main_df.drop(["songid","artist","track", "duration_ms", "popularity", "availability", "genres"], axis=1)
y = main_df["genres"]
# checking that they're dropped
X.shape

(574018, 13)

In [28]:
y.shape

(574018,)

In [24]:
X.head()

Unnamed: 0,index,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,0,0.0013,0.487,0.678,0.0551,9.0,0.0846,-7.78,1.0,0.0495,149.94,4.0,0.87
1,1,4.5e-05,0.662,0.823,0.952,4.0,0.343,-1.711,0.0,0.0662,177.745,4.0,0.621
2,4,0.00276,0.859,0.741,0.0,11.0,0.325,-12.329,0.0,0.271,98.082,4.0,0.529
3,5,0.348,0.361,0.483,3e-06,7.0,0.177,-6.875,1.0,0.0287,94.538,4.0,0.682
4,6,0.34,0.533,0.302,0.179,10.0,0.111,-10.308,1.0,0.0307,134.959,3.0,0.294


In [25]:
# instantiating scaler
scaler = MinMaxScaler()

# scaling data
data_scaled = scaler.fit_transform(X)

# confirming scaling took place
data_scaled[0]

array([0.        , 0.00130522, 0.4884654 , 0.678     , 0.0551    ,
       0.81818182, 0.0846    , 0.78720453, 1.        , 0.05118925,
       0.60009846, 0.8       , 0.87      ])

Data is scaled and ready to be fed to the model

#  support vector machine

In [10]:
# instantiating model class
knn = NearestNeighbors(n_neighbors=21,algorithm='kd_tree')

# fitting model
model = knn.fit(data_scaled)

# running model to test output
knn_results = knn.kneighbors([data_scaled[1000]])[1]

In [11]:
knn_results

array([[  1000, 647630, 304823, 412399,  48174, 411195, 240739, 537833,
         50765, 450355, 151925, 707048, 557285, 508280, 175165, 510531,
        652499, 267268, 161932, 587960,  79755]])

# Pickling model

In [8]:
import pickle

filename = './models/knn_model_v1.pkl'
pickle.dump(model, open(filename, 'wb'))

In [9]:
# pulling in the (now) pickled model
model_pickle = pickle.load(open(filename, 'rb'))

In [10]:
# Loading model to test on scaled data (not pickled data)
pickle_results = model_pickle.kneighbors([data_scaled[1000]])[1]

# displaying output
pickle_results

array([[  1000, 647630, 304823, 412399,  48174, 411195, 240739, 537833,
         50765, 450355, 151925, 707048, 557285, 508280, 175165, 510531,
        652499, 267268, 161932, 587960,  79755]])

# Pickling Scaled Data

In [11]:
filename2 = './data_scaled.pkl'
pickle.dump(data_scaled, open(filename2, 'wb'))

# (OLD) Testing both pickles for dataset, model

In [12]:
# importing libraries (even though they are visible above)
# to illustrate which packages are needed to create the function below

import pandas as pd 
from sklearn.neighbors import NearestNeighbors
import pickle


# this function predicts on a song id

def predict(id):
    #loads in pickled data and model
    X = pickle.load(open('./data_scaled.pkl', 'rb'))
    loaded_pickle = pickle.load(open('./models/knn_model_v1.pkl', 'rb'))
    #calculates results
    results = loaded_pickle.kneighbors([X[id]])[1]
    return results

In [13]:
predict(0)

array([[     0, 341644, 462704, 499704, 105639, 347586,  27005, 553442,
        594846, 273222, 636137, 218622, 375716,  40341, 510427, 518171,
        209537, 168110, 453579, 456931,  89949]])

In [14]:
content = {
    "audio_features": {
        "acousticness": 0.934,
        "danceability": 0.186,
        "energy": 0.107,
        "instrumentalness": 0,
        "key": 5,
        "liveness": 0.297,
        "loudness": -14.802,
        "mode": 1,
        "speechiness": 0.0347,
        "tempo": 107.095,
        "time_signature": 4,
        "valence": 0.149
    }
}

In [15]:
dataframe = pd.DataFrame.from_dict(
        json_normalize(content['audio_features']),
                                orient='columns')

In [16]:
dataframe

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,0.934,0.186,0.107,0,5,0.297,-14.802,1,0.0347,107.095,4,0.149


In [17]:
dataframe_scaled = scaler.transform(dataframe)

dataframe_scaled

array([[0.937751  , 0.18655968, 0.107     , 0.        , 0.45454545,
        0.297     , 0.68134949, 1.        , 0.03584711, 0.42840913,
        0.8       , 0.149     ]])

In [22]:
import pickle

filename = './models/scaler.pkl'
pickle.dump(scaler, open(filename, 'wb'))

In [14]:
target = 'songid'

song_id_array = song_list[target]

In [15]:
song_id_array

0         5PS5dpaLogPzYU9hWiWyZb
1         41RpZW2lxAdnqDd2nMBzLQ
2         2poHURuOfVNbzZdivAwtOH
3         1jg9hZnReygpBvV2axGuPy
4         3GsS8jzoixpCnp4jDWCEvb
                   ...          
116091    1xm8J6EFMA6N8JDqH8vzuz
116238    4NwmHBjPb9i9N3naLCMVCG
116261    5Xo8AsEz0gpW6Rpo2jXvBN
116308    4Fnz6vDqufd3ens0Gf9LC5
116324    2tolmRzbUfgL5KRplIqHlu
Name: songid, Length: 728156, dtype: object

In [16]:
sys.getsizeof(song_id_array)

NameError: name 'sys' is not defined

In [17]:
song_id_list = song_id_array.tolist()

In [37]:
sys.getsizeof(song_id_list)

5825312

In [39]:
import pickle

filename = 'Flask_API/SOUNDDRIP/data/song_id_array.pkl'
pickle.dump(song_id_array, open(filename, 'wb'))

In [30]:
import pickle

filename = 'song_id_list2.pkl'
pickle.dump(song_id_list, open(filename, 'wb'))

In [3]:
content = {
    "audio_features": {
        "acousticness": 0.934,
        "danceability": 0.186,
        "energy": 0.107,
        "instrumentalness": 0,
        "key": 5,
        "liveness": 0.297,
        "loudness": -14.802,
        "mode": 1,
        "speechiness": 0.0347,
        "tempo": 107.095,
        "time_signature": 4,
        "valence": 0.149
    }
}

def predict(content):
    print('Loading dataframe...')
    dataframe = pd.DataFrame.from_dict(
        json_normalize(content['audio_features']),
                                orient='columns')
    print('Dataframe Object Created')
    print('Loading pickled scaler...')
    scaler = pickle.load(open('models/scaler.pkl', 'rb'))
    print('Pickled scaler loaded')
    print('Scaling dataframe object...')
    dataframe_scaled = scaler.transform(dataframe)
    print('Dataframe scaled')
    print('Loading pickled model...')
    model = pickle.load(open('./models/knn_model_v1.pkl', 'rb'))
    print('Model loaded')
    results = model.kneighbors([dataframe_scaled][0])[1]
    print('Prediction executed')

    return results


In [4]:
predict(content)

Loading dataframe...
Dataframe Object Created
Loading pickled scaler...


NameError: name 'pickle' is not defined

In [127]:
  # For-loop for returning 20 songs
def all_similarities(data_result):
    similar_songs = []
    print('song_id_list loading...')
    song_id_list = pickle.load(open('Flask_API/SOUNDDRIP/data/song_id_list.pkl', 'rb'))
    print('song_id_list loaded')
    print('beginning for loop...')
    
    for song_row in data_result[0][1:]:
        song_id = song_id_list[song_row]
        similar_songs.append({'similarity': [.99], 'values': song_id})
    json_dict = {"songs": similar_songs}
    return json_dict

In [128]:
all_similarities(predict(content))

Loading dataframe...
Dataframe Object Created
Loading pickled scaler...
Pickled scaler loaded
Scaling dataframe object...
Dataframe scaled
Loading pickled model...
Model loaded
Prediction executed
song_id_list loading...
song_id_list loaded
beginning for loop...


{'songs': [{'similarity': [0.99], 'values': '3JeSeOxS09vw7ZaQXyDtkn'},
  {'similarity': [0.99], 'values': '0lkEvsJLWu7HjurNUnMYqt'},
  {'similarity': [0.99], 'values': '7GrCeh9O4Salm3oEoiDWmf'},
  {'similarity': [0.99], 'values': '705JQ49YZOupAYQXrny5Vb'},
  {'similarity': [0.99], 'values': '7v6maEWDjrKHQMVFoL8Tq3'},
  {'similarity': [0.99], 'values': '6pJeuRa8dj6Hph2r9Unn5H'},
  {'similarity': [0.99], 'values': '1XsXX5Q8mCdlDjroYAUs8r'},
  {'similarity': [0.99], 'values': '6nl7z8eqCRRNSPJevUg5yJ'},
  {'similarity': [0.99], 'values': '4Swt07x4GyojZApyerQ8Ta'},
  {'similarity': [0.99], 'values': '68PGQTrsU7NTe6hfcFD3Fh'},
  {'similarity': [0.99], 'values': '5jTnsVnWuxdSQnRv124hwL'},
  {'similarity': [0.99], 'values': '5YsyWAtEDsStifCxF5d7wJ'},
  {'similarity': [0.99], 'values': '4RKMHiQ1sQCJVcPEbqcXR2'},
  {'similarity': [0.99], 'values': '7dXTqzYBW6LcuMvfmhKjeS'},
  {'similarity': [0.99], 'values': '1pQPfwF8yZejKeu2rZOIwI'},
  {'similarity': [0.99], 'values': '2ydHMFR78xJCTaUSgQsY1d'},

In [111]:
sorted_list = sorted(similarities, key=lambda i: i['similarity'], 
                                                         reverse=True)[:20]
      json_dict = {"songs": sorted_list}
      #data = json.dumps(json_dict)
      return jsonify(json_dict), print('yay')

'70jVRf1KHVa4eROjpdmaja'

# Get Spotify API Token / Output Audio Features

In [1]:
import spotipy
import spotipy.util as util

In [2]:
USERNAME = 'augt12ocf9csxa4s8kbq9reg8' #your spotify username
CLIENT_ID = 'b954b92b9a674e6a9cf7322649da82d1' #set at your developer account
CLIENT_SECRET = '920cd41023994b1a99210243ce8967b2' #set at your developer account
REDIRECT_URI = 'https://google.com/' #set at your developer account, usually "http://localhost:8000"
SCOPE = 'user-library-read' # or else
# ps. REDIRECT_URI is crucial here. if http://localhost:8000 is not set, or with a single '/' misplaced, it will generate a connection error.

# then pass them:

token = util.prompt_for_user_token(username = USERNAME, 
                                   scope = SCOPE, 
                                   client_id = CLIENT_ID, 
                                   client_secret = CLIENT_SECRET, 
                                   redirect_uri = REDIRECT_URI)

if token:
   sp = spotipy.Spotify(auth=token)



            User authentication requires interaction with your
            web browser. Once you enter your credentials and
            give authorization, you will be redirected to
            a url.  Paste that url you were directed to to
            complete the authorization.

        
Opened https://accounts.spotify.com/authorize?client_id=b954b92b9a674e6a9cf7322649da82d1&response_type=code&redirect_uri=https%3A%2F%2Fgoogle.com%2F&scope=user-library-read in your browser




Enter the URL you were redirected to:  https://www.google.com/?code=AQB6_guH_Q9DO3bhNcW534HrKK8bs0qvOZuuUDkWgYuzBXz-cdVz2uFdXlFM4X3nkXIfZ-yyx5-LVeSxJWtGp5j6cH3PhdznuNqVhfrh02QG0eF38xbO-UiPvtFg-lfr6FudpyjfwIQouNkiDbe6SfGR4yEM_MV7SBFEZ3FA7sWicsF7vXl38ekWg9Qui4D_Awk0pdBTDVkbFAwT






In [4]:
token

'BQAzm9VZMwr75qsRL2WkGWTyRVqFn9OYXIEFEzlOZfijT07_L8W5VBu0KymszA5Eg3n2_4np7yS6zibHQnfDt-DS8GZQ-7heWLAh51v4J15HzBCZhMdLJYZKgVq8aPaWU6Rfj52bmNPKnIUCvClGq-ixU2Laml5q_oUZwN7-sPko'

In [7]:
token_content = {'token': token}

In [8]:
type(token_content)

dict

In [9]:
token_content['token']

'BQAuxAqZlqyiBIPC-2MnrYjFH2amjQm8tLkh_ZYI_HkJ8MUHepG_lQrofpN--fGXM8b3tfXMfshOUvoF2Dkuh8iCepPvIvXqxCfrfSi52Rr8Njq0cnyLpde4_xo1v0-lMDjpMpeEOccK7yV_PPBVadskMoOr'

In [10]:
def get_id(token):
    sp = spotipy.Spotify(auth=token)
    results = sp.current_user_saved_tracks()
    song_id = results['items'][0]['track']['id']
    return song_id

In [11]:
def get_features(song_id):
    results_dict = sp.audio_features(song_id)[0]
    audio_features = {"audio_features":{key: results_dict[key] for key in results_dict.keys() & {'danceability', 'energy',
                                                                                                'key', 'loudness', 'mode',
                                                                                                'speechiness', 'acousticness',
                                                                                                'instrumentalness', 'liveness',
                                                                                                'valence', 'tempo', 'time_signature'}}}
        
    return audio_features

In [12]:
 acoustical_features = get_features(get_id(token))

In [13]:
acoustical_features

{'audio_features': {'liveness': 0.204,
  'acousticness': 0.472,
  'key': 9,
  'danceability': 0.889,
  'energy': 0.816,
  'speechiness': 0.247,
  'mode': 1,
  'loudness': -4.67,
  'valence': 0.777,
  'instrumentalness': 0,
  'time_signature': 4,
  'tempo': 96.056}}

# Flask Predict function

In [14]:
def predict(content):
    similar_songs = []
    print('Loading dataframe...')
    dataframe = pd.DataFrame.from_dict(
        json_normalize(content['audio_features']),
                                orient='columns')
    print('Dataframe Object Created')
    print('Loading pickled scaler...')
    scalar = load('scalar2.joblib')
    print('Pickled scaler loaded')
    print('Scaling dataframe object...')
    dataframe_scaled = scalar.transform(dataframe)
    print('Dataframe scaled')
    print('Loading pickled model...')
    model = load('model2.joblib')
    print('Model loaded')
    results = model.kneighbors([dataframe_scaled][0])[1]
    print('Prediction executed')
    print('song_id_list loading...')
    song_id_list = load('song_id_list2.joblib')
    print('song_id_list loaded')
    
    print('beginning for loop...')
    for song_row in results[0][1:]:
        song_id = song_id_list[song_row]
        similar_songs.append({'similarity': [.99], 'values': song_id})
    json_dict = {"songs": similar_songs}
    return json_dict


In [17]:
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from flask import request
from joblib import dump
from joblib import load

import pandas as pd
from pandas.io.json import json_normalize
from flask import jsonify

In [18]:
%timeit

predict(acoustical_features)

Loading dataframe...
Dataframe Object Created
Loading pickled scaler...
Pickled scaler loaded
Scaling dataframe object...
Dataframe scaled
Loading pickled model...
Model loaded
Prediction executed
song_id_list loading...
song_id_list loaded
beginning for loop...


{'songs': [{'similarity': [0.99], 'values': '5YdNeg0onlsEFoNdFjwG9X'},
  {'similarity': [0.99], 'values': '3XDPtaCpjC83LW1E7wfeVW'},
  {'similarity': [0.99], 'values': '6Ycon1DmLHH4xVVZaJqEdI'},
  {'similarity': [0.99], 'values': '6T8DFxrmY8y03FGYDxkmpg'},
  {'similarity': [0.99], 'values': '0lO6VPXcW9wIOKxZ0wecjG'},
  {'similarity': [0.99], 'values': '56KyV36puztkiJ62ca3D1t'},
  {'similarity': [0.99], 'values': '2UUIpe2Jtflo8dX9jeTSqQ'},
  {'similarity': [0.99], 'values': '6lneyspCLiV4hfccAidPeU'},
  {'similarity': [0.99], 'values': '4nKjzgBwU9ksBUG4VJ787c'},
  {'similarity': [0.99], 'values': '6galGfTFHHzsPieYkReYyN'},
  {'similarity': [0.99], 'values': '7F9Dbj9ul1FtOPcsXDF3R9'},
  {'similarity': [0.99], 'values': '0e7jY6MjiJdKNwUiwDhyxM'},
  {'similarity': [0.99], 'values': '1LVfv8UzT0Zw3YjAbfcl4U'},
  {'similarity': [0.99], 'values': '3QUgFGZvPhtdioo1MRepuE'},
  {'similarity': [0.99], 'values': '6SuyOjhiF6vssCI7URWLDZ'},
  {'similarity': [0.99], 'values': '015p8TgFahkz0viuux9YGJ'},

# Dump

In [12]:
dump(model, 'model2.joblib', compress=True)

['model2.joblib']

In [4]:
# %time

# pickle_loaded = pickle.load(open('Flask_API/SOUNDDRIP/models/knn_model_v1.pkl', 'rb'))
# joblib_loaded = load('model.joblib')



In [5]:
# dump(data_scaled, 'data_scaled.joblib', compress=True)

In [13]:
dump(scaler, 'scalar2.joblib', compress=True)

['scalar2.joblib']

In [18]:
dump(song_id_list, 'song_id_list2.joblib', compress=True)

['song_id_list2.joblib']