In [1]:
# These are all the imports I used in this notebook
import pandas as pd
import numpy as np
from pickle import load
import pprint

In [2]:
# this is just to test the model by loading in data. the API is where the information will actually come from
songs = pd.read_csv("song_list5.csv")

In [3]:
# These are the features and in this order
# this is the information you will need from the 
features = songs[[
    "danceability", "energy", "key", "loudness", "mode", "speechiness", 
    "acousticness", "instrumentalness", "liveness", "valence", "tempo", 
    "duration_ms"]].to_numpy()

In [4]:
# this is what the data will look like
# notice here that the data has one only one set of brackets around the vector
features[0]

array([ 4.56000e-01,  2.55000e-01,  9.00000e+00, -1.58050e+01,
        1.00000e+00,  4.80000e-02,  9.46000e-01,  1.70000e-01,
        9.51000e-01,  5.32000e-02,  1.16424e+02,  2.53067e+05])

In [5]:
# The takes the data and turns it into a 2 dimentional numpy array
new_df = np.atleast_2d(features[0])

In [6]:
# It will Look like this
# now there are 2 sets of brackets around the vector
new_df

array([[ 4.56000e-01,  2.55000e-01,  9.00000e+00, -1.58050e+01,
         1.00000e+00,  4.80000e-02,  9.46000e-01,  1.70000e-01,
         9.51000e-01,  5.32000e-02,  1.16424e+02,  2.53067e+05]])

In [7]:
# The 12 has to be the second number in order to be fed into the model
new_df.shape

(1, 12)

In [8]:
# This is how to load in the scaler
scaler = load(open('scaler2.pkl', 'rb'))

In [9]:
type(scaler)

sklearn.preprocessing._data.StandardScaler

In [10]:
# Using the scaler to transform the data
x_train = scaler.transform(new_df)

In [11]:
# after the information is scaled it will look like this rather than the scientific notation
# it will still have 2 sets of brackets around the vector
# its shape will still have 12 as the second number
print(x_train.shape)
print(x_train)

(1, 12)
[[-0.47695143 -1.43616823  1.03467011 -1.32564479  0.70711739 -0.34028656
   1.91669133 -0.14321669  3.87500921 -1.70714343 -0.17066299  0.03404049]]


In [12]:
# this is how to load in the model
# encoder = load_model('encoder.h5')
encoder = load(open('VAE_Encoder.pkl', 'rb'))

Using TensorFlow backend.


In [13]:
# this is how to make the predictions with the model
preds = encoder.predict(x_train)

In [14]:
# the predictions will be an array of 3 vectors containing 2 numbers in each
# the first set of 2 numbers are the ones we want
# these 2 numbers will then be saved into a database along with the track_id
preds = encoder.predict(x_train)
# But now when we make predictions it gives us 3 sets of predictions
preds

[array([[ 0.56023014, -1.7370206 ]], dtype=float32),
 array([[-1.6997617, -1.9096086]], dtype=float32),
 array([[ 0.80455625, -1.7457287 ]], dtype=float32)]

In [15]:
# the way to fix this is to add "[0]" to the end of preds
preds[0]

array([[ 0.56023014, -1.7370206 ]], dtype=float32)

In [16]:
# from here a K nearest neighbors will be preformed on the data base to find the x number of songs closest to our song in the database.
# x is the number of songs requested by the user and will be given to us by the back end
# if a number isn't given to us by the back end than default the number to 5

In [17]:
# Now lets say we have multiple predictions we want to make
new_df = features[0:5]
new_df

array([[ 4.56000e-01,  2.55000e-01,  9.00000e+00, -1.58050e+01,
         1.00000e+00,  4.80000e-02,  9.46000e-01,  1.70000e-01,
         9.51000e-01,  5.32000e-02,  1.16424e+02,  2.53067e+05],
       [ 5.35000e-01,  8.06000e-01,  7.00000e+00, -1.02890e+01,
         1.00000e+00,  6.42000e-02,  4.36000e-03,  1.91000e-02,
         4.57000e-01,  3.76000e-01,  9.00890e+01,  6.37330e+04],
       [ 2.94000e-01,  4.82000e-01,  5.00000e+00, -6.40600e+00,
         1.00000e+00,  4.30000e-02,  4.63000e-01,  0.00000e+00,
         3.35000e-01,  2.04000e-01,  1.66693e+02,  2.55280e+05],
       [ 5.63000e-01,  6.31000e-01,  0.00000e+00, -5.14400e+00,
         1.00000e+00,  3.24000e-02,  6.35000e-02,  8.46000e-06,
         1.63000e-01,  5.40000e-01,  1.15657e+02,  2.54827e+05],
       [ 3.58000e-01,  6.11000e-01,  2.00000e+00, -9.75200e+00,
         0.00000e+00,  4.54000e-02,  5.15000e-01,  4.68000e-04,
         1.49000e-01,  1.64000e-01,  1.71596e+02,  2.56933e+05]])

In [18]:
# Scale the data
new_df = scaler.transform(new_df)

In [19]:
# we dont need at_least2D anymore for multiple predictions
new_df

array([[-0.47695143, -1.43616823,  1.03467011, -1.32564479,  0.70711739,
        -0.34028656,  1.91669133, -0.14321669,  3.87500921, -1.70714343,
        -0.17066299,  0.03404049],
       [-0.03739907,  0.69843612,  0.47428756, -0.17173186,  0.70711739,
        -0.1963104 , -0.87635746, -0.58458172,  1.27879637, -0.49117162,
        -1.06220066, -1.51723037],
       [-1.37831198, -0.55675773, -0.08609498,  0.64056762,  0.70711739,
        -0.38472365,  0.48403919, -0.64044701,  0.6376264 , -1.13908721,
         1.53112958,  0.05217227],
       [ 0.11839165,  0.02047648, -1.48705135,  0.90457018,  0.70711739,
        -0.47893027, -0.70093914, -0.64042226, -0.26631816,  0.12660836,
        -0.19662879,  0.04846071],
       [-1.02221893, -0.05700462, -0.9266688 , -0.05939479, -1.41419234,
        -0.36339385,  0.63827917, -0.63907816, -0.33989504, -1.28976525,
         1.69711436,  0.0657158 ]])

In [20]:
# predicting is just like normal
preds2 = encoder.predict(new_df)

In [21]:
# adding "[0]" to the end still gives all the correct predictions
preds2[0]

array([[ 0.5602303 , -1.7370207 ],
       [ 0.0436189 ,  0.97871184],
       [-0.2970995 ,  0.13700697],
       [-0.16146967, -0.02508018],
       [-0.05932711, -0.77811337]], dtype=float32)