In [58]:
model_name = 'nn_sequential'
model_variant = 'base'

In [59]:
import pandas as pd

df = pd.read_csv('../spotify_dataset.csv')
df = df.drop(columns=['Song ID', 'Artist', 'Number of Times Charted',
 'Week of Highest Charting', 'Song Name', 'Streams', 'Release Date',
  'Weeks Charted', 'Popularity', 'Index'])
df = df.replace(' ','0')

print('Shape of dataframe is:', df.shape)

df.head(5)

Shape of dataframe is: (1556, 13)


Unnamed: 0,Highest Charting Position,Artist Followers,Genre,Danceability,Energy,Loudness,Speechiness,Acousticness,Liveness,Tempo,Duration (ms),Valence,Chord
0,1,3377762,"['indie rock italiano', 'italian pop']",0.714,0.8,-4.808,0.0504,0.127,0.359,134.002,211560,0.589,B
1,2,2230022,['australian hip hop'],0.591,0.764,-5.484,0.0483,0.0383,0.103,169.928,141806,0.478,C#/Db
2,1,6266514,['pop'],0.563,0.664,-5.044,0.154,0.335,0.0849,166.928,178147,0.688,A
3,3,83293380,"['pop', 'uk pop']",0.808,0.897,-3.712,0.0348,0.0469,0.364,126.026,231041,0.591,B
4,5,5473565,"['lgbtq+ hip hop', 'pop rap']",0.736,0.704,-7.409,0.0615,0.0203,0.0501,149.995,212000,0.894,D#/Eb


In [60]:
# LABEL ENCODE CHORD 
# 'A' -> 1, 'B' -> 3, 'C#/Db' -> 5, etc.

from sklearn.preprocessing import LabelEncoder

enc = LabelEncoder()
df['Chord'] = enc.fit_transform(df.Chord.values)
df.head()

Unnamed: 0,Highest Charting Position,Artist Followers,Genre,Danceability,Energy,Loudness,Speechiness,Acousticness,Liveness,Tempo,Duration (ms),Valence,Chord
0,1,3377762,"['indie rock italiano', 'italian pop']",0.714,0.8,-4.808,0.0504,0.127,0.359,134.002,211560,0.589,3
1,2,2230022,['australian hip hop'],0.591,0.764,-5.484,0.0483,0.0383,0.103,169.928,141806,0.478,5
2,1,6266514,['pop'],0.563,0.664,-5.044,0.154,0.335,0.0849,166.928,178147,0.688,1
3,3,83293380,"['pop', 'uk pop']",0.808,0.897,-3.712,0.0348,0.0469,0.364,126.026,231041,0.591,3
4,5,5473565,"['lgbtq+ hip hop', 'pop rap']",0.736,0.704,-7.409,0.0615,0.0203,0.0501,149.995,212000,0.894,7


In [61]:
# NORMALIZE TEMPO

def normalize(df, column):
    column_norm = df[column].values.astype(float)
    v_min = min(column_norm)
    v_max = max(column_norm)
    for i, v in enumerate(column_norm):
        column_norm[i] = (v-v_min)/(v_max-v_min)
    return column_norm

df['Tempo'] = normalize(df, 'Tempo')
df.head()


Unnamed: 0,Highest Charting Position,Artist Followers,Genre,Danceability,Energy,Loudness,Speechiness,Acousticness,Liveness,Tempo,Duration (ms),Valence,Chord
0,1,3377762,"['indie rock italiano', 'italian pop']",0.714,0.8,-4.808,0.0504,0.127,0.359,0.652802,211560,0.589,3
1,2,2230022,['australian hip hop'],0.591,0.764,-5.484,0.0483,0.0383,0.103,0.827819,141806,0.478,5
2,1,6266514,['pop'],0.563,0.664,-5.044,0.154,0.335,0.0849,0.813204,178147,0.688,1
3,3,83293380,"['pop', 'uk pop']",0.808,0.897,-3.712,0.0348,0.0469,0.364,0.613946,231041,0.591,3
4,5,5473565,"['lgbtq+ hip hop', 'pop rap']",0.736,0.704,-7.409,0.0615,0.0203,0.0501,0.730713,212000,0.894,7


In [62]:
# STANDARDINALIZE ARTIST FOLLOWERS & DURATION

from sklearn.preprocessing import StandardScaler
import numpy as np 

def standardinalize(df, column):
    s = StandardScaler()
    column_values = df[column]
    # m_values = list(m_weights.values())
    column_values = np.array(column_values)
    column_values = column_values.reshape(-1,1)
    s.fit(column_values)
    return s.transform(column_values)

    # df[column] = df[column].apply(lambda x: s.transform(x))

df['Artist Followers'] = standardinalize(df, 'Artist Followers')
df['Duration (ms)'] = standardinalize(df, 'Duration (ms)')
df.head(5)

Unnamed: 0,Highest Charting Position,Artist Followers,Genre,Danceability,Energy,Loudness,Speechiness,Acousticness,Liveness,Tempo,Duration (ms),Valence,Chord
0,1,-0.674495,"['indie rock italiano', 'italian pop']",0.714,0.8,-4.808,0.0504,0.127,0.359,0.652802,0.301524,0.589,3
1,2,-0.743399,['australian hip hop'],0.591,0.764,-5.484,0.0483,0.0383,0.103,0.827819,-1.098915,0.478,5
2,1,-0.50107,['pop'],0.563,0.664,-5.044,0.154,0.335,0.0849,0.813204,-0.369303,0.688,1
3,3,4.123208,"['pop', 'uk pop']",0.808,0.897,-3.712,0.0348,0.0469,0.364,0.613946,0.692641,0.591,3
4,5,-0.548674,"['lgbtq+ hip hop', 'pop rap']",0.736,0.704,-7.409,0.0615,0.0203,0.0501,0.730713,0.310358,0.894,7


In [63]:
df['Artist Followers'] = df['Artist Followers'].values.astype(int)
df['Danceability'] = df['Danceability'].values.astype(float)
df['Energy'] = df['Energy'].values.astype(float)
df['Loudness'] = df['Loudness'].values.astype(float)
df['Speechiness'] = df['Speechiness'].values.astype(float)
df['Acousticness'] = df['Acousticness'].values.astype(float)
df['Liveness'] = df['Liveness'].values.astype(float)
df['Tempo'] = df['Tempo'].values.astype(float)
df['Duration (ms)'] = df['Duration (ms)'].values.astype(int)
df['Valence'] = df['Valence'].values.astype(float)

y = df['Highest Charting Position'].values

split = round(len(df)*0.6)
X_train = df[:split]
y_train = y[:split]
X_test = df[split:]
y_test = y[split:]

print('Shape of X_train is:', X_train.shape)
print('Shape of y_train is:', y_train.shape)
print('Shape of X_test is:', X_test.shape)
print('Shape of y_test is:', y_test.shape)

X_train.head(5)

Shape of X_train is: (934, 13)
Shape of y_train is: (934,)
Shape of X_test is: (622, 13)
Shape of y_test is: (622,)


Unnamed: 0,Highest Charting Position,Artist Followers,Genre,Danceability,Energy,Loudness,Speechiness,Acousticness,Liveness,Tempo,Duration (ms),Valence,Chord
0,1,0,"['indie rock italiano', 'italian pop']",0.714,0.8,-4.808,0.0504,0.127,0.359,0.652802,0,0.589,3
1,2,0,['australian hip hop'],0.591,0.764,-5.484,0.0483,0.0383,0.103,0.827819,-1,0.478,5
2,1,0,['pop'],0.563,0.664,-5.044,0.154,0.335,0.0849,0.813204,0,0.688,1
3,3,4,"['pop', 'uk pop']",0.808,0.897,-3.712,0.0348,0.0469,0.364,0.613946,0,0.591,3
4,5,0,"['lgbtq+ hip hop', 'pop rap']",0.736,0.704,-7.409,0.0615,0.0203,0.0501,0.730713,0,0.894,7


In [64]:
# SETTING UP KERAS CALLBACKS

import os
import time
import tensorflow as tf
from keras import callbacks

# Borrowed from: https://www.geeksforgeeks.org/choose-optimal-number-of-epochs-to-train-a-neural-network-in-keras/
earlystopping = callbacks.EarlyStopping(monitor ="val_loss", 
                                        mode ="min", patience = 10, 
                                        restore_best_weights = True)

def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        exp = lr0 * 0.1**(epoch / s)
        tf.summary.scalar('learning rate', data=exp, step=epoch)
        return exp
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(lr0=0.01, s=10)
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(exponential_decay_fn)

root_logdir = os.path.join(os.curdir, "./logged_models/" + model_name + '/' + model_variant)
def get_run_logdir():
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

run_log_dir = get_run_logdir()
file_writer = tf.summary.create_file_writer(run_log_dir + "/metrics")
file_writer.set_as_default()

tensorboard_cb = tf.keras.callbacks.TensorBoard(run_log_dir)

my_callbacks = [earlystopping]

In [65]:
from tensorflow.keras import layers

model = tf.keras.Sequential([
        layers.Dense(2, activation="relu"),
        layers.Dense(3, activation="relu"),
        layers.Dense(4),
])

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train['Artist Followers'], y_train, epochs=50, callbacks=my_callbacks, validation_split=0.2)

model.summary()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 2)                 4         
_________________________________________________________________
dense_10 (Dense)             (None, 3)                 9         
_________________________________________________________