In [19]:
# if not installed run an install for the following: !pip install keras-tuner

In [20]:
# Import dependencies 
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

In [21]:
# Filepaths, numpy, and Tensorflow
import os
import numpy as np
import tensorflow as tf
import tensorflow as tf
from tensorflow.keras import layers, models

In [22]:
# Sklearn scaling
from sklearn.preprocessing import MinMaxScaler
import sklearn as skl

# Keras Specific Dependencies

# Keras
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from keras_tuner import HyperParameters
import pandas as pd

In [23]:
# Read in our data
spotify_df = pd.read_csv("spotify_songs_nogenre.csv")
spotify_df.head()

NameError: name 'pd' is not defined

In [None]:
 # Set 'song' and 'artist' columns as the index
spotify_df.set_index(['song', 'artist'], inplace=True)

# Display the updated DataFrame
spotify_df.head()

In [None]:
#get dummies for genre column
genre_df = pd.get_dummies(spotify_df, columns=['genre'])
genre_df.head()

In [None]:
#concat spotify dataframe with genre dummies dataframe
spotify_df = pd.concat([spotify_df, genre_df], axis=1)
spotify_df.head()

In [None]:
# drop original genre column
spotify_df.drop(columns=['genre'], inplace=True)
spotify_df.head()

In [None]:
#split data
X = spotify_df.drop(columns=spotify_df.filter(like='genre_').columns)  
y = spotify_df.filter(like='genre_')  

In [None]:
#train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Create scaler instance
X_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the neural network
model = models.Sequential()

# Input layer (number of features in X_train)
model.add(layers.InputLayer(input_shape=(X_train.shape[1],)))

In [None]:
# Hidden layers (example with 2 hidden layers)
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))

In [None]:
# Output layer (number of genres, using softmax for multi-class classification)
model.add(layers.Dense(y_train.shape[1], activation='softmax'))

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

In [None]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

# Tune Model

In [None]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=30,
        step=5), activation=activation, input_shape=(X_train.shape[1],)))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=30,
            step=5),
            activation=activation))

    nn_model.add(layers.Dense(y_train.shape[1], activation='softmax'))

    # Compile the model
    nn_model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=["accuracy"])

    return nn_model

In [None]:
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

In [None]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

In [None]:
# Get top 3 model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
    print(param.values)

In [None]:
# Evaluate the top 3 models against the test dataset
top_model = tuner.get_best_models(3)
for model in top_model:
    model_loss, model_accuracy = model.evaluate(X_test_scaled,y_test,verbose=2)
    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# save model
model.save("spotify_trained.h5")

# Prediciting classification

In [None]:
# Load the model
from tensorflow.keras.models import load_model
model = load_model("best_model.h5")

In [None]:
from sklearn.preprocessing import StandardScaler

# Assuming you have already fitted the scaler on the training data
scaler = StandardScaler()
scaler.fit(X_train)

# Select a specific row, e.g., make sure it matches 
row_to_classify = spotify_df.iloc[14]

# Define the features used during training
features = ['duration_ms', 'explicit', 'year', 'popularity', 'danceability',
            'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
            'instrumentalness', 'liveness', 'valence', 'tempo']  # 14 distinct features

# Define your genre labels
genre_labels = ['Dance/Electronic', 'R&B', 'Country', 'Hip Hop',
                'Latin', 'Metal', 'Pop', 'Rock']

# Extract feature values for the selected row
row_features = row_to_classify[features].values.reshape(1, -1)

# Apply the same scaler used during training
row_features_scaled = scaler.transform(row_features)

# Make a classification prediction with the model
predicted_genre_prob = model.predict(row_features_scaled)
predicted_genre_index = predicted_genre_prob.argmax(axis=-1)  # Get the index of the highest probability
predicted_genre_label = genre_labels[predicted_genre_index[0]]  # Map index to genre label

# Get the actual genre (one-hot encoded in the DataFrame) and convert it to label
actual_genre_one_hot = row_to_classify[['genre_Dance/Electronic', 'genre_R&B',
       'genre_country', 'genre_hip hop', 'genre_latin', 'genre_metal',
       'genre_pop', 'genre_rock']].values

# Find the index of the actual genre from the one-hot encoding
actual_genre_index = actual_genre_one_hot.argmax()
actual_genre_label = genre_labels[actual_genre_index]

# Get the song name from the index
song_name = spotify_df.index[14]  # Select the row

# Check if the predicted genre matches the actual genre
is_classified_correctly = predicted_genre_label == actual_genre_label

# Output the results including the song name
print(f"Song Name: {song_name}")
print(f"Predicted Genre: {predicted_genre_label}")
print(f"Actual Genre: {actual_genre_label}")
print(f"Classified Correctly: {is_classified_correctly}")
