In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn.model_selection as sk
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras import backend as K

## 1. Load & Prepare Data

In [3]:
# merge features and label
#df= pd.read_csv('25K_tracks_features_and_labels_all.csv')
df= pd.read_csv('../../preprocessing/datasets/ohe_25K_tracks_features_and_labels_all.csv')
df

Unnamed: 0.1,Unnamed: 0,track_id,track_genre_top,track_title,artist_name,set_split,set_subset,tempo,rms_harmonic_mean,rms_harmonic_var,...,key_C,key_C#,key_D,key_E,key_Eb,key_F,key_F#,key_G,scale_major,scale_minor
0,0,2,Hip-Hop,Food,AWOL,training,small,161.499023,0.060801,0.000606,...,1,0,0,0,0,0,0,0,0,1
1,1,3,Hip-Hop,Electric Ave,AWOL,training,medium,86.132812,0.068484,0.001239,...,0,0,0,0,0,0,0,1,0,1
2,2,5,Hip-Hop,This World,AWOL,training,small,99.384014,0.079553,0.002230,...,0,1,0,0,0,0,0,0,0,1
3,3,10,Pop,Freeway,Kurt Vile,training,small,112.347147,0.135624,0.000600,...,0,0,0,0,0,0,1,0,1,0
4,4,134,Hip-Hop,Street Music,AWOL,training,medium,135.999178,0.088261,0.001123,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24767,24995,155297,Instrumental,Nebula Reborn,Alex Mason/BlackSunAeon Music,training,medium,123.046875,0.038447,0.000094,...,0,0,1,0,0,0,0,0,1,0
24768,24996,155298,Folk,An Idiot Abroad,Greg Atkinson,training,medium,129.199219,0.139233,0.003167,...,0,0,0,0,0,0,0,1,1,0
24769,24997,155306,Folk,Tiny Man,Greg Atkinson,training,medium,103.359375,0.116513,0.003872,...,0,0,0,0,0,0,0,0,1,0
24770,24998,155307,Experimental,Kolka,AWOTT,training,medium,117.453835,0.085435,0.001713,...,1,0,0,0,0,0,0,0,0,1


In [4]:
#convert categorical variables/target class to code
df_train = df.copy()
# df_train['key'] = LabelEncoder().fit_transform(df_train['key'])
# df_train['scale'] = LabelEncoder().fit_transform(df_train['scale'])
df_train['track_genre_top'] = LabelEncoder().fit_transform(df_train['track_genre_top'])

feature_cols = list(df_train.columns[7:])
print(f'There are {len(feature_cols)} features')
print(feature_cols)
target_col = 'track_genre_top'
classes = df[target_col].unique()
num_class = len(classes)
print(f'There are {num_class} classes, they are:', classes)

There are 88 features
['tempo', 'rms_harmonic_mean', 'rms_harmonic_var', 'rms_percussive_mean', 'rms_percussive_var', 'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_rolloff_mean', 'spectral_rolloff_var', 'zero_crossings', 'chroma_stft_0_mean', 'chroma_stft_1_mean', 'chroma_stft_2_mean', 'chroma_stft_3_mean', 'chroma_stft_4_mean', 'chroma_stft_5_mean', 'chroma_stft_6_mean', 'chroma_stft_7_mean', 'chroma_stft_8_mean', 'chroma_stft_9_mean', 'chroma_stft_10_mean', 'chroma_stft_11_mean', 'chroma_stft_0_var', 'chroma_stft_1_var', 'chroma_stft_2_var', 'chroma_stft_3_var', 'chroma_stft_4_var', 'chroma_stft_5_var', 'chroma_stft_6_var', 'chroma_stft_7_var', 'chroma_stft_8_var', 'chroma_stft_9_var', 'chroma_stft_10_var', 'chroma_stft_11_var', 'MFCC_1_mean', 'MFCC_2_mean', 'MFCC_3_mean', 'MFCC_4_mean', 'MFCC_5_mean', 'MFCC_6_mean', 'MFCC_7_mean', 'MFCC_8_mean', 'MFCC_9_mean', 'MFCC_10_mean', 'MFCC_11_mean', 'MFCC_12_mean', 'MFCC_13_mean', 'MFCC_14_mean', 'MFCC_15_mean', 'MFCC_16_mean

In [5]:
df[target_col].value_counts()

Rock                   7098
Electronic             6311
Experimental           2250
Hip-Hop                2197
Folk                   1518
Instrumental           1349
Pop                    1186
International          1018
Classical               619
Old-Time / Historic     510
Jazz                    384
Country                 178
Soul-RnB                154
Name: track_genre_top, dtype: int64

In [6]:
#Define train/test/validation & apply scaling
df_training = df_train.loc[df_train['set_split']=='training'].dropna()
df_testing = df_train.loc[df_train['set_split']=='test'].dropna()
df_validation = df_train.loc[df_train['set_split']=='validation'].dropna()

scaler = StandardScaler()

X_train = scaler.fit_transform(df_training[feature_cols])
y_train =  df_training[target_col].values
X_test = scaler.fit_transform(df_testing[feature_cols])
y_test = df_testing[target_col].values
X_val = scaler.fit_transform(df_validation[feature_cols])
y_val = df_validation[target_col].values
print('Training data:',X_train.shape)
print('Testing data:',X_test.shape)
print('Validation data:',X_val.shape)

Training data: (19744, 88)
Testing data: (2546, 88)
Validation data: (2482, 88)


In [7]:
#Oversample minority classes
##In the end we did not use oversampled because it performed worse than un-sampled data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
print('Oversampled X_train:', X_train_resampled.shape)

Oversampled X_train: (73801, 88)


## 2. Build Models & Tune Hyperparameters
#### *You can skip this part and go straight to running the final model

In [8]:


# Define hyperparameters to tune
learning_rates = [0.0001, 0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]
hidden_layer_sizes = [[128,64,32], [256,128,64,32], [512,256,128,64,32]]
dropout_rates = [0.2, 0.5]


best_accuracy = 0
best_hyperparameters = None

# Grid search over hyperparameters
for lr in learning_rates:
    for batch_size in batch_sizes:
          for dropout_rate in dropout_rates:
              K.clear_session()  # Clear previous model
              model = Sequential()

              for hidden_size in hidden_layer_sizes:
                  for size in hidden_size:
                      model.add(Dense(size, activation='relu'))
                      model.add(Dropout(dropout_rate))
              model.add(Dense(num_class, activation='softmax'))

              optimizer = Adam(learning_rate=lr)
              model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

              # Early stopping to prevent overfitting
              early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

              history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                                    epochs=20, batch_size=batch_size, callbacks=[early_stopping])

              # Evaluate model on validation set
              _, accuracy = model.evaluate(X_val, y_val, verbose=0)

              # Check if this set of hyperparameters improved accuracy
              if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_hyperparameters = {'lr': lr, 'batch_size': batch_size, 'hidden_layer_size': hidden_size, 'dropout_rate': dropout_rate}
                    best_model = model
# Print the best hyperparameters
print("Best Hyperparameters:", best_hyperparameters)



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20




Epoch 1/20
 69/617 [==>...........................] - ETA: 1s - loss: 2.2372 - accuracy: 0.2545

InvalidArgumentError: Graph execution error:

Detected at node Adam/truediv_15 defined at (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main

  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/traitlets/config/application.py", line 1053, in launch_instance

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 736, in start

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/asyncio/base_events.py", line 601, in run_forever

  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once

  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/asyncio/events.py", line 80, in _run

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 505, in process_one

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 740, in execute_request

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 422, in do_execute

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 546, in run_cell

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3024, in run_cell

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3079, in _run_cell

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3284, in run_cell_async

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3466, in run_ast_nodes

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code

  File "/var/folders/np/f11s751j4dj_y_1cj4yxydpwpsr474/T/ipykernel_62120/2201503592.py", line 30, in <module>

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1154, in train_step

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/optimizers/optimizer.py", line 544, in minimize

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/optimizers/optimizer.py", line 1223, in apply_gradients

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/optimizers/optimizer.py", line 652, in apply_gradients

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/optimizers/optimizer.py", line 1253, in _internal_apply_gradients

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/optimizers/optimizer.py", line 1345, in _distributed_apply_gradients_fn

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/optimizers/optimizer.py", line 1342, in apply_grad_to_update_var

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/optimizers/optimizer.py", line 241, in _update_step

  File "/Users/1108232/PycharmProjects/MADS-699-Capstone-fa23-team12/venv/lib/python3.9/site-packages/keras/src/optimizers/adam.py", line 204, in update_step

Incompatible shapes: [0] vs. [256]
	 [[{{node Adam/truediv_15}}]] [Op:__inference_train_function_427058]

In [9]:
best_hyperparameters

{'lr': 0.001,
 'batch_size': 128,
 'hidden_layer_size': [512, 256, 128, 64, 32],
 'dropout_rate': 0.2}

In [10]:
test_loss, test_accuracy = best_model.evaluate(X_test, y_test, batch_size=128)
print("The test loss is :",test_loss)
print("\nThe test Accuracy is :",test_accuracy)

The test loss is : 1.4869064092636108

The test Accuracy is : 0.5416339635848999


## 3. Retrain final model using the best model architecture & hyperparameters

In [11]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

model = keras.models.Sequential([
    keras.layers.Dense(512, activation="relu", input_shape=(X_train.shape[1],)),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(256,activation="relu"),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(128,activation="relu"),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(64,activation="relu"),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(32,activation="relu"),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(num_class, activation="softmax"),

])
print(model.summary())
model.compile(optimizer=Adam(lr=0.001),
                  loss='sparse_categorical_crossentropy',
                   metrics='accuracy')

model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=30,
                     batch_size=128)

test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=128)
print("The test loss is :",test_loss)
print("\nThe test Accuracy is :",test_accuracy)


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_13 (Dense)            (None, 512)               45568     
                                                                 
 dropout_12 (Dropout)        (None, 512)               0         
                                                                 
 dense_14 (Dense)            (None, 256)               131328    
                                                                 
 dropout_13 (Dropout)        (None, 256)               0         
                                                                 
 dense_15 (Dense)            (None, 128)               32896     
                                                                 
 dropout_14 (Dropout)        (None, 128)               0         
                                                                 
 dense_16 (Dense)            (None, 64)               



None
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
The test loss is : 1.5868606567382812

The test Accuracy is : 0.5718774795532227


In [18]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)  # Use argmax for categorical predictions


# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_classes))

# Print confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_classes))

# Print accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print("Accuracy:", accuracy)

# Print precision
precision = precision_score(y_test, y_pred_classes, average='weighted')
print("Precision:", precision)

# Print recall
recall = recall_score(y_test, y_pred_classes, average='weighted')
print("Recall:", recall)

# Print F1 score
f1 = f1_score(y_test, y_pred_classes, average='weighted')
print("F1 Score:", f1)

Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.81      0.77        62
           1       0.00      0.00      0.00        18
           2       0.64      0.71      0.67       632
           3       0.33      0.36      0.34       225
           4       0.22      0.28      0.24       152
           5       0.68      0.65      0.66       220
           6       0.37      0.22      0.28       174
           7       0.48      0.28      0.36       102
           8       0.56      0.36      0.44        39
           9       0.91      0.98      0.94        51
          10       0.14      0.08      0.10       119
          11       0.68      0.78      0.72       710
          12       0.00      0.00      0.00        42

    accuracy                           0.57      2546
   macro avg       0.44      0.42      0.43      2546
weighted avg       0.54      0.57      0.55      2546

Confusion Matrix:
[[ 50   0   1   4   0   0   6   0   1 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
confusion_matrix(y_test, y_pred_classes)

array([[ 50,   0,   1,   4,   0,   0,   6,   0,   1,   0,   0,   0,   0],
       [  0,   0,   4,   0,   0,   1,   0,   0,   0,   0,   2,  11,   0],
       [  5,   0, 449,  45,  16,  29,  15,   9,   5,   1,  10,  48,   0],
       [  4,   0,  48,  80,  18,   5,  16,   1,   0,   0,   6,  47,   0],
       [  0,   0,  11,  20,  42,   1,   6,   7,   2,   3,  10,  50,   0],
       [  0,   0,  54,   9,   1, 142,   1,   6,   0,   0,   2,   5,   0],
       [  4,   0,  21,  27,  53,   1,  38,   2,   0,   0,   8,  20,   0],
       [  0,   0,  15,   2,  24,   5,   6,  29,   0,   0,   2,  19,   0],
       [  2,   0,   4,   2,   6,   0,   7,   0,  14,   0,   1,   3,   0],
       [  0,   0,   0,   1,   0,   0,   0,   0,   0,  50,   0,   0,   0],
       [  0,   0,  43,   5,  13,  10,   2,   1,   0,   0,  10,  35,   0],
       [  3,   0,  41,  46,  20,  13,   5,   4,   3,   1,  22, 552,   0],
       [  0,   0,  12,   0,   0,   2,   0,   2,   0,   0,   1,  25,   0]])

In [19]:
model.save('cnn_genre_prediction.h5')

  saving_api.save_model(


In [105]:
type(X_train)

numpy.ndarray

In [57]:
# to get feature importances

import shap

background = X_train[np.random.choice(X_train.shape[0], 10000, replace=False)]

# Create a SHAP Explainer
explainer = shap.Explainer(model, background)

# Select a sample (or multiple samples) for explanation
sample = X_test[np.random.choice(X_test.shape[0], 1000, replace=False)]

# Calculate SHAP values
shap_values = explainer(sample)


PermutationExplainer explainer: 1001it [01:25, 10.59it/s]                         


In [88]:
feature_importances_per_class = {}

# Iterate over each class (label)
for label_index in range(shap_values.shape[2]):
    # Calculate mean absolute SHAP values for each feature for this label
    mean_abs_shap_per_feature = np.abs(shap_values.values[:, :, label_index]).mean(axis=0)
    
    # Assuming 'feature_cols' is a list of your feature names
    feature_importances_per_class[f'Label_{label_index}'] = dict(zip(feature_cols, mean_abs_shap_per_feature))

# Convert the dictionary to a DataFrame for easier handling
df_feature_importances = pd.DataFrame(feature_importances_per_class)



In [99]:
cnn_fi_df = pd.DataFrame(df_feature_importances.T.mean()).T

In [101]:
cnn_fi_df.to_csv('cnn_feature_importances.csv')