In [None]:
# Dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical
import random

In [None]:
#Read CSV, place in DF
df = pd.read_csv("features.csv")
df.head()
df = df.dropna()

In [None]:
heartWillGoOn = [{'acousticness': 0.732, 'danceability': 0.428, 'energy': 0.276,  
                 'id': '3oEHQmhvFLiE7ZYES0ulzv', 'instrumentalness': 5.33e-06, 'key': 4, 
                 'liveness': 0.117,'loudness': -11.729,'mode': 1,
                  'speechiness': 0.0312, 'tempo': 99.195, 'valence': 0.0382}]

In [None]:
df = df.append(heartWillGoOn)
df = df.reset_index()
df.tail()

In [None]:
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(df[['acousticness','danceability','energy','instrumentalness','key',\
                                           'liveness','loudness','mode','speechiness','tempo','valence']])
df_normalized = pd.DataFrame(np_scaled)


In [None]:
df_normalized.columns = ['acousticness','danceability','energy','instrumentalness','key',\
                                           'liveness','loudness','mode','speechiness','tempo','valence']

In [None]:
df_normalized['id'] = df['id']
df_normalized.tail()

In [None]:
df_normalized,last_row=df_normalized.drop(df_normalized.tail(1).index),df_normalized.tail(1)
last_row

In [None]:
last_row['acousticness']
df_normalized['score'] = ( 
    df_normalized['acousticness'].apply(lambda row: np.abs(row - last_row['acousticness'])) + \
    df_normalized['danceability'].apply(lambda row: np.abs(row - last_row['danceability'])) + \
    df_normalized['energy'].apply(lambda row: np.abs(row - last_row['energy'])) + \
    df_normalized['instrumentalness'].apply(lambda row: np.abs(row - last_row['instrumentalness'])) + \
    df_normalized['key'].apply(lambda row: np.abs(row - last_row['key'])) + \
    df_normalized['liveness'].apply(lambda row: np.abs(row - last_row['liveness'])) + \
    df_normalized['loudness'].apply(lambda row: np.abs(row - last_row['loudness'])) + \
    df_normalized['mode'].apply(lambda row: np.abs(row - last_row['mode'])) + \
    df_normalized['speechiness'].apply(lambda row: np.abs(row - last_row['speechiness'])) + \
    df_normalized['tempo'].apply(lambda row: np.abs(row - last_row['tempo'])) + \
    df_normalized['valence'].apply(lambda row: np.abs(row - last_row['valence']))                                    
)

In [None]:
df_normalized.tail()

In [None]:
df_normalized['class'] = df_normalized['score'].apply(lambda row: True if row < 1.5 else False)
df_normalized.head()

In [None]:
df_normalized['class'].value_counts()

## Data Pre-Processing

In [None]:
data = df_normalized.drop(["class", "score","id"], axis=1)
#feature_names = data.columns
data.head()
target = df_normalized["class"]
print(data.shape, target.shape)

In [None]:


X_train, X_test, y_train, y_test = train_test_split(
    data, target, random_state=20, stratify=target)
# X_scaler = StandardScaler().fit(X_train)
# X_train_scaled = X_scaler.transform(X_train)
# X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create a Deep Learning Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense

# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='selu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

model.summary()

In [None]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
#     X_train_scaled,  Test 2 - Removed scaling.  This step was performed on data BEFORE being entered into machine
    X_train,
    y_train_categorical,
    epochs=20,
    shuffle=True,
    verbose=1
)

## Quantify our Trained Model

In [None]:
#For second test, sub'd X_test_scaled for _test
model_loss, model_accuracy = model.evaluate(
    X_test, y_test_categorical, verbose=2) 
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

## Make Predictions

In [None]:
#For second test, sub'd X_test_scaled for _test
encoded_predictions = model.predict_classes(X_test[:100])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [None]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:100])}")

In [None]:
name = input("Provide Name for Saved File: ")
# Save the model
folder = "Saved Models"
model.save(f'{folder}\{name}.h5')

In [None]:
probs = model.predict(df_normalized[['acousticness','danceability','energy','instrumentalness','key',\
                                           'liveness','loudness','mode','speechiness','tempo','valence']])

In [None]:
# Using the probabilities of NOT RECOMMENDED we can choose a random 3 songs which have a probability of 1 to find a bad song
not_rec_list = []
for index, item in enumerate(probs):
#     print(index, item)
    if item[0] == 1:
        not_rec_list.append({"id":index,
                         "score": item[0]})
        
not_rec_sel = random.sample(not_rec_list, 3)

In [None]:
# The probabilities of recommendation (not NOT recommended) went far lower than expected
rec_list = []
for index, item in enumerate(probs):
#     print(index, item)
    if item[0] < 0.000001:
        rec_list.append({"id":index,
                         "score": item[0]})
        
rec_sel = random.sample(rec_list, 3)

In [None]:
len(rec_list)

In [None]:
# finding the lowest score gives us the single highest recommended song
bestScoreItem = min(rec_list, key=lambda x:x['score'])
bestScoreItem

In [None]:
# finding the song id's of our not recommended songs
for i in not_rec_sel:
    index = i["id"]
    print(df_normalized.loc[index]["id"])

In [None]:
# finding the song id of our highest recommendation
index = bestScoreItem["id"]
print(df_normalized.loc[index]["id"])

In [None]:
# A different approach was needed to get our top three songs
newlist = sorted(rec_list, key=lambda k: k['score']) 
newlist[0:3]

In [None]:
# finding the song ids of our top three songs (note the one duplicate)
for i in newlist[0:4]:
    index = i["id"]
    print(df_normalized.loc[index]["id"])