# LSTM for Feature Extraction

## Importing Required Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import ast



## Read the CSV File with Audio Features

In [2]:
df = pd.read_csv('Data/updated_dataset_with_audio_features.csv')

## Exploratory Data Analysis

In [3]:
print("\nChecking for missing values:")
print(df.isnull().sum())


Checking for missing values:
track_id            0
name                5
artist              5
genre               0
release_date        0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
time_signature      0
duration_ms         0
album_cover_url     8
bar_count           3
beat_count          3
tatum_count         3
avg_timbre          3
avg_pitch           3
dtype: int64


### Removed the data with missing values

In [4]:
columns_with_nans = ['bar_count', 'beat_count', 'tatum_count', 'avg_timbre', 'avg_pitch']

# Dropping rows where any of the specified columns have missing values
df = df.dropna(subset=columns_with_nans)

# Optionally, you might want to reset the index if you're planning on using the index in future operations
df.reset_index(drop=True, inplace=True)

print("\nAfter removing rows with missing values:")
print(df.isnull().sum())


After removing rows with missing values:
track_id            0
name                5
artist              5
genre               0
release_date        0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
time_signature      0
duration_ms         0
album_cover_url     8
bar_count           0
beat_count          0
tatum_count         0
avg_timbre          0
avg_pitch           0
dtype: int64


### Converted (12*1) arrays of 'avg_timbre' and 'avg_pitch' to 12 columns each.

In [5]:
df['avg_timbre'] = df['avg_timbre'].apply(ast.literal_eval)
df['avg_pitch'] = df['avg_pitch'].apply(ast.literal_eval)

In [6]:
timbre_columns = [f'timbre_{i}' for i in range(12)]
pitch_columns = [f'pitch_{i}' for i in range(12)]

In [7]:
timbre_df = pd.DataFrame(df['avg_timbre'].tolist(), columns=timbre_columns, index=df.index)
pitch_df = pd.DataFrame(df['avg_pitch'].tolist(), columns=pitch_columns, index=df.index)

In [8]:
df_expanded = pd.concat([df, timbre_df, pitch_df], axis=1)

In [9]:
features = ['tempo', 'loudness', 'key', 'time_signature', 'mode', 'bar_count', 'beat_count', 'duration_ms'] + timbre_columns + pitch_columns
targets = ['danceability', 'energy', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence']


## Train-Test Split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(df_expanded[features], df_expanded[targets], test_size=0.2, random_state=42)

In [11]:
y_train[0:1]

Unnamed: 0,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence
6365,0.46,0.497,0.0739,0.544,0.0,0.226,0.554


## Scaling the data

In [12]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
from joblib import dump

# Assuming your scaler is named "scaler" and has been fitted to your training data
dump(scaler, 'scaler.joblib')

['scaler.joblib']

In [14]:
X_train_scaled

array([[0.6563231 , 0.74338812, 0.18181818, ..., 0.51534161, 0.28900405,
        0.34488467],
       [0.58161231, 0.86800602, 0.36363636, ..., 0.67648396, 0.30171423,
        0.41005285],
       [0.60894586, 0.86031985, 0.09090909, ..., 0.26462613, 0.33490822,
        0.2497152 ],
       ...,
       [0.71773501, 0.85837062, 1.        , ..., 0.3023889 , 0.23673143,
        0.41798922],
       [0.4330537 , 0.59318655, 0.36363636, ..., 0.23625334, 0.18112224,
        0.37133918],
       [0.44251007, 0.66526381, 0.        , ..., 0.15796592, 0.21753203,
        0.23373458]])

## Building the Model and Hyperparameter Tuning

In [15]:
import tensorflow as tf

2024-04-20 18:55:52.395371: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-20 18:56:00.123797: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))

In [19]:
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

def build_model(hp):
    model = Sequential()
    
    # Define hyperparameter search space
    model.add(LSTM(units=hp.Int('units', min_value=32, max_value=512, step=32),
                   input_shape=(1,X_train_scaled.shape[1]),
                   return_sequences=True))
    model.add(Dropout(rate=hp.Float('dropout_1', min_value=0.0, max_value=0.5, step=0.1)))
    
    # Optionally adding a second LSTM layer
    if hp.Boolean('second_lstm_layer'):
        model.add(LSTM(units=hp.Int('units_l2', min_value=32, max_value=512, step=32), return_sequences=False))
        model.add(Dropout(rate=hp.Float('dropout_2', min_value=0.0, max_value=0.5, step=0.1)))
    else:
        model.add(LSTM(units=hp.Int('units_l2', min_value=32, max_value=512, step=32)))
    
    model.add(Dense(7))  # Output layer for the 7 target variables
    
    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    return model

# Create a tuner
tuner = kt.Hyperband(build_model,
                     objective='val_loss',
                     max_epochs=10,
                     directory='my_dir',
                     project_name='intro_to_kt')

# Create a callback to stop training early after reaching a certain value for the validation loss
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Execute the hyperparameter search
tuner.search(X_train_reshaped, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first LSTM layer is {best_hps.get('units')},
and the optimal dropout rate in the first LSTM layer is {best_hps.get('dropout_1')}.
""")

print(f"""
The hyperparameter search is complete. The optimal number of units in the second LSTM layer is {best_hps.get('units_l2')},
and the optimal dropout rate in the first LSTM layer is {best_hps.get('dropout_2')}.
""")

# Build the model with the optimal hyperparameters and train it on the data
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train_reshaped, y_train, epochs=100, validation_split=0.2)

Reloading Tuner from my_dir/intro_to_kt/tuner0.json

The hyperparameter search is complete. The optimal number of units in the first LSTM layer is 192,
and the optimal dropout rate in the first LSTM layer is 0.1.


The hyperparameter search is complete. The optimal number of units in the second LSTM layer is 416,
and the optimal dropout rate in the first LSTM layer is 0.0.

Epoch 1/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 118ms/step - loss: 0.0790 - mae: 0.2024 - val_loss: 0.0316 - val_mae: 0.1300
Epoch 2/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 107ms/step - loss: 0.0305 - mae: 0.1261 - val_loss: 0.0248 - val_mae: 0.1141
Epoch 3/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 109ms/step - loss: 0.0251 - mae: 0.1136 - val_loss: 0.0243 - val_mae: 0.1132
Epoch 4/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 107ms/step - loss: 0.0241 - mae: 0.1111 - val_loss: 0.0231 - val_mae: 0.1043

## Making Prediction on Test Data and Calculating Performance

In [20]:
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))


In [21]:
predictions = model.predict(X_test_reshaped)


[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


In [22]:
from sklearn.metrics import mean_squared_error

# Ensure predictions and y_total are in the same format
# If y_total is a DataFrame, you might need to convert predictions to DataFrame or vice versa
predictions_df = pd.DataFrame(predictions, columns=y_test.columns)

# Calculate MSE for each target variable
for column in y_test.columns:
    mse = mean_squared_error(y_test[column], predictions_df[column])
    print(f'MSE for {column}: {mse}')


MSE for danceability: 0.007122517971619974
MSE for energy: 0.004722989208851494
MSE for speechiness: 0.0029597603693015443
MSE for acousticness: 0.007257743931439267
MSE for instrumentalness: 0.03806349811535897
MSE for liveness: 0.02460997063143774
MSE for valence: 0.02603261554741383


## Saving the Trained Model

In [36]:
from tensorflow.keras.models import load_model

In [44]:
model.save('RNN_LSTM_audiofeatures.keras')