In [1]:
import numpy as np
import pandas as pd
import json
import os

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
artists = pd.read_csv("../../../data/processed/artists.csv")
tracks = pd.read_json("../../../data/v2/tracks.json")
track_storage = pd.read_json("../../../data/v2/track_storage.json")
users = pd.read_json("../../../data/v2/users.json")

# Preprocess tracks data

In [3]:
tracks.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,id_artist,release_date,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0RNxWy0PC3AyH4ThH3aGK6,Mack the Knife,55,201467,0,19eLuQmk9aCobbVDHc6eek,1929,0.673,0.377,0,-14.141,0.0697,0.586,0.0,0.332,0.713,88.973
1,2W889aLIKxULEefrleFBFI,Someone to Watch Over Me,54,198000,0,1Mxqyy3pSjf8kZZL4QVxS0,1943,0.204,0.151,2,-17.842,0.0418,0.947,9e-06,0.321,0.134,91.783
2,4Pnzw1nLOpDNV6MKI5ueIR,Nancy (With the Laughing Face) - 78rpm Version,55,199000,0,1Mxqyy3pSjf8kZZL4QVxS0,1944,0.295,0.0826,1,-19.569,0.0367,0.984,0.000358,0.156,0.169,128.6
3,7GLmfKOe5BfOXk7334DoKt,Saturday Night (Is The Loneliest Night In The ...,54,163000,0,1Mxqyy3pSjf8kZZL4QVxS0,1944,0.561,0.335,9,-11.093,0.0499,0.84,2e-06,0.788,0.59,126.974
4,6kD1SNGPkfX9LwaGd1FG92,Put Your Dreams Away (For Another Day),53,186173,0,1Mxqyy3pSjf8kZZL4QVxS0,1944,0.197,0.0546,1,-22.411,0.0346,0.95,0.276,0.152,0.1,90.15


In [4]:
track_ids = tracks.id

In [5]:
VALID_COLUMN_NAMES = ['duration_ms', 'popularity', 'explicit', 'release_date','danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

In [6]:
tracks = tracks[VALID_COLUMN_NAMES]
tracks.head()

Unnamed: 0,duration_ms,popularity,explicit,release_date,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,201467,55,0,1929,0.673,0.377,0,-14.141,0.0697,0.586,0.0,0.332,0.713,88.973
1,198000,54,0,1943,0.204,0.151,2,-17.842,0.0418,0.947,9e-06,0.321,0.134,91.783
2,199000,55,0,1944,0.295,0.0826,1,-19.569,0.0367,0.984,0.000358,0.156,0.169,128.6
3,163000,54,0,1944,0.561,0.335,9,-11.093,0.0499,0.84,2e-06,0.788,0.59,126.974
4,186173,53,0,1944,0.197,0.0546,1,-22.411,0.0346,0.95,0.276,0.152,0.1,90.15


In [7]:
tracks.dtypes

duration_ms           int64
popularity            int64
explicit              int64
release_date         object
danceability        float64
energy              float64
key                   int64
loudness            float64
speechiness         float64
acousticness        float64
instrumentalness    float64
liveness            float64
valence             float64
tempo               float64
dtype: object

### Release date

Simplify this column to just have a year of the song release

In [8]:
rd = tracks.release_date
rd = pd.to_datetime(rd, errors='coerce')
tracks['release_date'] = rd.dt.year.fillna(0).astype(int)

### Standarizing columns

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
tracks.head()

Unnamed: 0,duration_ms,popularity,explicit,release_date,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,201467,55,0,1929,0.673,0.377,0,-14.141,0.0697,0.586,0.0,0.332,0.713,88.973
1,198000,54,0,1943,0.204,0.151,2,-17.842,0.0418,0.947,9e-06,0.321,0.134,91.783
2,199000,55,0,1944,0.295,0.0826,1,-19.569,0.0367,0.984,0.000358,0.156,0.169,128.6
3,163000,54,0,1944,0.561,0.335,9,-11.093,0.0499,0.84,2e-06,0.788,0.59,126.974
4,186173,53,0,1944,0.197,0.0546,1,-22.411,0.0346,0.95,0.276,0.152,0.1,90.15


In [11]:
scaler = StandardScaler()
tracks = pd.DataFrame(scaler.fit_transform(tracks), columns=tracks.columns)

In [12]:
tracks.head()

Unnamed: 0,duration_ms,popularity,explicit,release_date,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,-0.397693,-0.792191,-0.447118,-5.542188,0.466992,-1.293191,-1.483343,-1.857878,-0.147948,1.152614,-0.263179,0.826836,0.797638,-1.112254
1,-0.445783,-0.916444,-0.447118,-4.536483,-2.496578,-2.371576,-0.921346,-2.847975,-0.460325,2.460088,-0.263119,0.761337,-1.570737,-1.017368
2,-0.431913,-0.792191,-0.447118,-4.464647,-1.921557,-2.697954,-1.202345,-3.309985,-0.517426,2.594095,-0.260837,-0.221159,-1.427571,0.225836
3,-0.931261,-0.916444,-0.447118,-4.464647,-0.240726,-1.493599,1.045646,-1.042471,-0.369635,2.072554,-0.263169,3.542097,0.294512,0.170931
4,-0.609833,-1.040697,-0.447118,-4.464647,-2.54081,-2.83156,-1.202345,-4.070282,-0.540938,2.470953,1.542815,-0.244977,-1.709812,-1.07251


### Key column

This column contains the categorical columns wih low cardinality. So it will be One Hot Encoded.

In [13]:
tracks.key.value_counts()

-1.483343    2631
 0.483649    2433
 1.045646    2359
-0.921346    2308
-1.202345    2052
-0.359348    1866
-0.078349    1854
 1.607644    1768
 0.202650    1522
 0.764647    1441
 1.326645    1429
-0.640347     749
Name: key, dtype: int64

In [14]:
tracks = pd.get_dummies(tracks, columns=['key'])

In [15]:
tracks.columns

Index(['duration_ms', 'popularity', 'explicit', 'release_date', 'danceability',
       'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence', 'tempo', 'key_-1.4833434758561947',
       'key_-1.202344617097444', 'key_-0.9213457583386933',
       'key_-0.6403468995799425', 'key_-0.3593480408211918',
       'key_-0.07834918206244113', 'key_0.20264967669630957',
       'key_0.4836485354550603', 'key_0.764647394213811',
       'key_1.0456462529725616', 'key_1.3266451117313125',
       'key_1.6076439704900631'],
      dtype='object')

In [16]:
tracks.shape

(22412, 25)

In [17]:
tracks.head()

Unnamed: 0,duration_ms,popularity,explicit,release_date,danceability,energy,loudness,speechiness,acousticness,instrumentalness,...,key_-0.9213457583386933,key_-0.6403468995799425,key_-0.3593480408211918,key_-0.07834918206244113,key_0.20264967669630957,key_0.4836485354550603,key_0.764647394213811,key_1.0456462529725616,key_1.3266451117313125,key_1.6076439704900631
0,-0.397693,-0.792191,-0.447118,-5.542188,0.466992,-1.293191,-1.857878,-0.147948,1.152614,-0.263179,...,0,0,0,0,0,0,0,0,0,0
1,-0.445783,-0.916444,-0.447118,-4.536483,-2.496578,-2.371576,-2.847975,-0.460325,2.460088,-0.263119,...,1,0,0,0,0,0,0,0,0,0
2,-0.431913,-0.792191,-0.447118,-4.464647,-1.921557,-2.697954,-3.309985,-0.517426,2.594095,-0.260837,...,0,0,0,0,0,0,0,0,0,0
3,-0.931261,-0.916444,-0.447118,-4.464647,-0.240726,-1.493599,-1.042471,-0.369635,2.072554,-0.263169,...,0,0,0,0,0,0,0,1,0,0
4,-0.609833,-1.040697,-0.447118,-4.464647,-2.54081,-2.83156,-4.070282,-0.540938,2.470953,1.542815,...,0,0,0,0,0,0,0,0,0,0


### Train VAE

In [18]:
import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model, decomposition, manifold, preprocessing
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_curve, roc_curve, auc, accuracy_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold, KFold, train_test_split
from tensorflow.keras import layers
import pickle
import time
from numpy import load
from matplotlib import pyplot
import pickle
import argparse

import tensorflow as tf
from tensorflow import keras
print(tf.__version__)

from tensorflow.compat.v1.keras.layers import Input, Dense, LeakyReLU, Conv2D, MaxPooling2D, UpSampling2D,  Concatenate
from tensorflow.compat.v1.keras.models import Model
from tensorflow.compat.v1.keras.layers import Dense, Reshape, Flatten
from tensorflow.compat.v1.keras.layers import Dropout,BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import mse, binary_crossentropy, logcosh
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import plot_model
from tensorflow.keras import backend as K

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3"

from scipy.stats import wasserstein_distance
import pandas as pd
from sklearn.metrics import mean_absolute_error
from datetime import datetime

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
2.10.1


In [19]:
# Define the encoder model
input_features = keras.Input(shape=(25,), name="input_features")
x = Dense(32, activation="relu")(input_features)
x = Dense(64, activation="relu")(x)
x = Dense(128, activation="relu")(x)

z_mean = Dense(64, name="z_mean")(x)
z_log_var = Dense(64, name="z_log_var")(x)

# Define the sampling layer to sample from the latent space
class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch_size = tf.shape(z_mean)[0]
        latent_dim = tf.shape(z_mean)[1]
        epsilon = tf.random.normal(shape=(batch_size, latent_dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon
z = Sampling()([z_mean, z_log_var])

# Define the decoder model
latent_inputs = Input(shape=(64,), name="z")
x = Dense(128, activation="relu")(latent_inputs)
x = Dense(64, activation="relu")(x)
x = Dense(32, activation="relu")(x)
outputs = Dense(25, activation="sigmoid")(x)

# Define the VAE model
encoder = Model(inputs=input_features, outputs=[z_mean, z_log_var, z], name="encoder")
decoder = Model(inputs=latent_inputs, outputs=outputs, name="decoder")
outputs = decoder(z)
vae = Model(inputs=input_features, outputs=outputs, name="vae")

# Print the summary of the models
encoder.summary()
decoder.summary()
vae.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_features (InputLayer)    [(None, 25)]         0           []                               
                                                                                                  
 dense (Dense)                  (None, 32)           832         ['input_features[0][0]']         
                                                                                                  
 dense_1 (Dense)                (None, 64)           2112        ['dense[0][0]']                  
                                                                                                  
 dense_2 (Dense)                (None, 128)          8320        ['dense_1[0][0]']                
                                                                                            

VAE 2

In [20]:
def kl_loss(z_mean, z_log_var):
    """Calculates the KL divergence loss."""
    kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=-1)
    return kl_loss

def reconstruction_loss(inputs, reconstructed):
    mse = tf.keras.losses.MeanAbsoluteError()
    loss = mse(inputs, reconstructed)
    return loss

@tf.function
def train_step(inputs, vae_optimizer):
    with tf.GradientTape() as tape:
        z_mean, z_log_var, z = encoder(inputs)
        reconstructed = decoder(z)
        reconstruction_losses = reconstruction_loss(inputs, reconstructed)
        kl_losses = kl_loss(z_mean, z_log_var)
        loss = tf.reduce_mean(reconstruction_losses + kl_losses)
    gradients = tape.gradient(loss, vae.trainable_variables)
    vae_optimizer.apply_gradients(zip(gradients, vae.trainable_variables))
    return loss

def train(train_dataset, epochs, vae_optimizer):
    for epoch in range(epochs):
        epoch_loss = 0
        for batch in train_dataset:
            inputs = batch[0]
            loss = train_step(inputs, vae_optimizer)
            epoch_loss += loss
        print(f"Epoch {epoch + 1}: Loss = {epoch_loss / len(train_dataset):.4f}")



In [21]:
tracks = tracks.astype('float32')

In [22]:
train_dataset = tf.data.Dataset.from_tensor_slices(tracks.values)
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(512)

train_dataset = train_dataset.map(lambda x: (tf.reshape(x, [-1, 25]), x))
vae_optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-4)

In [23]:
train(train_dataset, epochs=100, vae_optimizer=vae_optimizer)

Epoch 1: Loss = 0.7045
Epoch 2: Loss = 0.6668
Epoch 3: Loss = 0.6313
Epoch 4: Loss = 0.5971
Epoch 5: Loss = 0.5644
Epoch 6: Loss = 0.5328
Epoch 7: Loss = 0.5010
Epoch 8: Loss = 0.4734
Epoch 9: Loss = 0.4533
Epoch 10: Loss = 0.4404
Epoch 11: Loss = 0.4328
Epoch 12: Loss = 0.4288
Epoch 13: Loss = 0.4266
Epoch 14: Loss = 0.4257
Epoch 15: Loss = 0.4252
Epoch 16: Loss = 0.4248
Epoch 17: Loss = 0.4247
Epoch 18: Loss = 0.4246
Epoch 19: Loss = 0.4245
Epoch 20: Loss = 0.4244
Epoch 21: Loss = 0.4244
Epoch 22: Loss = 0.4243
Epoch 23: Loss = 0.4243
Epoch 24: Loss = 0.4242
Epoch 25: Loss = 0.4241
Epoch 26: Loss = 0.4241
Epoch 27: Loss = 0.4240
Epoch 28: Loss = 0.4239
Epoch 29: Loss = 0.4239
Epoch 30: Loss = 0.4237
Epoch 31: Loss = 0.4237
Epoch 32: Loss = 0.4234
Epoch 33: Loss = 0.4232
Epoch 34: Loss = 0.4230
Epoch 35: Loss = 0.4229
Epoch 36: Loss = 0.4226
Epoch 37: Loss = 0.4223
Epoch 38: Loss = 0.4215
Epoch 39: Loss = 0.4203
Epoch 40: Loss = 0.4200
Epoch 41: Loss = 0.4201
Epoch 42: Loss = 0.4206
E