In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import preprocessing as prp
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
target = pd.DataFrame()

In [4]:
df = pd.read_csv("features_3_sec.csv")

In [5]:
df.shape

(9990, 60)

In [6]:
df.columns

Index(['filename', 'length', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean',
       'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var',
       'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean',
       'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
       'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var', 'tempo',
       'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean',
       'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var',
       'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
       'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var',
       'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean',
       'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var',
       'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean',
       'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var',
  

In [7]:
len(df["filename"].unique())

9990

In [8]:
df.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


In [9]:
df.drop("filename", axis = 1, inplace=True)

In [10]:
encoder = prp.LabelEncoder()
X = df.drop("label", axis = 1)
y = encoder.fit_transform(df["label"])

In [11]:
standard_scaler = prp.StandardScaler()
max_abs_scaler = prp.MaxAbsScaler()
min_max_scaler = prp.MinMaxScaler()

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify = y)

In [13]:
X_train_std_scaled = standard_scaler.fit_transform(X_train)
X_test_std_scaled = standard_scaler.transform(X_test)

In [14]:
X_train_max_abs_scaled = max_abs_scaler.fit_transform(X_train)
X_test_max_abs_scaled = max_abs_scaler.transform(X_test)

In [15]:
X_train_min_max_scaled = min_max_scaler.fit_transform(X_train)
X_test_min_max_scaled = min_max_scaler.transform(X_test)

In [16]:
def model_assess(name, model, XTrain, XTest):
    model.fit(XTrain, y_train)
    preds = model.predict(XTest)
    print(f'{name} Accuracy:', round(accuracy_score(y_test, preds), 5), '\n')
    target.loc[name, "Accuracy"] = round(accuracy_score(y_test, preds), 5)

# Gradient boost with 1000 estimators had best result

In [17]:
from xgboost import XGBClassifier

In [18]:
model  = XGBClassifier(n_estimators = 1000, learning_rate = 0.01)

In [19]:
model_assess("xgb_no_scale", model, X_train, X_test)

xgb_no_scale Accuracy: 0.8642 



In [20]:
model_assess("xgb_std_scale", model, X_train_std_scaled , X_test_std_scaled)

xgb_std_scale Accuracy: 0.8642 



In [21]:
model_assess("xgb_max_abs_scale", model, X_train_max_abs_scaled, X_test_max_abs_scaled)

xgb_max_abs_scale Accuracy: 0.8642 



In [22]:
model_assess("xgb_min_max_scale", model, X_train_min_max_scaled, X_test_min_max_scaled)

xgb_min_max_scale Accuracy: 0.86453 



In [23]:
target

Unnamed: 0,Accuracy
xgb_no_scale,0.8642
xgb_std_scale,0.8642
xgb_max_abs_scale,0.8642
xgb_min_max_scale,0.86453


# The problms there's a chance that train and test set will have different part of the same song! thus probably we are getting this very high accuracy?

In [24]:
df = pd.read_csv("features_3_sec.csv")

In [25]:
# no idea why!!
df.drop("length", axis = 1, inplace = True)

In [26]:
df

Unnamed: 0,filename,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,3714.560359,...,39.687145,-3.241280,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,0.343065,0.086147,0.112699,0.001450,1816.693777,90525.690866,2010.051501,65671.875673,3869.682242,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.030830,5.784063,59.943081,blues
2,blues.00000.2.wav,0.346815,0.092243,0.132003,0.004620,1788.539719,111407.437613,2084.565132,75124.921716,3997.639160,...,67.336563,-1.768610,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,3568.300218,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,3469.992864,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9985,rock.00099.5.wav,0.349126,0.080515,0.050019,0.000097,1499.083005,164266.886443,1718.707215,85931.574523,3015.559458,...,42.485981,-9.094270,38.326839,-4.246976,31.049839,-5.625813,48.804092,1.818823,38.966969,rock
9986,rock.00099.6.wav,0.372564,0.082626,0.057897,0.000088,1847.965128,281054.935973,1906.468492,99727.037054,3746.694524,...,32.415203,-12.375726,66.418587,-3.081278,54.414265,-11.960546,63.452255,0.428857,18.697033,rock
9987,rock.00099.7.wav,0.347481,0.089019,0.052403,0.000701,1346.157659,662956.246325,1561.859087,138762.841945,2442.362154,...,78.228149,-2.524483,21.778994,4.809936,25.980829,1.775686,48.582378,-0.299545,41.586990,rock
9988,rock.00099.8.wav,0.387527,0.084815,0.066430,0.000320,2084.515327,203891.039161,2018.366254,22860.992562,4313.266226,...,28.323744,-5.363541,17.209942,6.462601,21.442928,2.354765,24.843613,0.675824,12.787750,rock


In [27]:
df["filename"][0]

'blues.00000.0.wav'

In [28]:
df["filename"] = df["filename"].str[:-6]

In [29]:
len(df["filename"].unique())

1000

In [30]:
temp = pd.DataFrame(df["filename"].value_counts())

In [31]:
temp[temp["count"] != 10]

Unnamed: 0_level_0,count
filename,Unnamed: 1_level_1
hiphop.00031,9
rock.00038,9
country.00004,9
rock.00027,9
country.00003,9
classical.00051,9
country.00007,9
classical.00049,9
hiphop.00032,9
disco.00014,9


In [32]:
df.label.value_counts()

label
blues        1000
jazz         1000
metal        1000
pop          1000
reggae       1000
disco         999
classical     998
hiphop        998
rock          998
country       997
Name: count, dtype: int64

In [33]:
df["filename"][9989]

'rock.00099'

In [34]:
test = pd.DataFrame(columns=df.columns)

# Iterate through unique labels and filter rows for the 98th and 99th files
for label in df["label"].unique():
    label_df = df[df['label'] == label].iloc[-200:]
    test = pd.concat([test, label_df], ignore_index=True)

test.shape


(2000, 59)

In [35]:
len(test["filename"].unique())

200

In [36]:
test_set_value_count_df = pd.DataFrame(test["filename"].value_counts())

In [37]:
test_set_value_count_df

Unnamed: 0_level_0,count
filename,Unnamed: 1_level_1
blues.00080,10
metal.00097,10
metal.00087,10
metal.00088,10
metal.00089,10
...,...
disco.00089,10
disco.00090,10
disco.00091,10
disco.00092,10


In [38]:
test_set_value_count_df["count"].value_counts()

count
10    200
Name: count, dtype: int64

In [39]:
test["label"].value_counts()

label
blues        200
classical    200
country      200
disco        200
hiphop       200
jazz         200
metal        200
pop          200
reggae       200
rock         200
Name: count, dtype: int64

In [40]:
import pandas as pd

# Assuming you have the original 'df' and 'test_df' DataFrames

# Merge the 'df' and 'test_df' DataFrames with the '_merge' indicator
merged_df = df.merge(test, on=df.columns.to_list(), how='left', indicator=True)

# Select rows that are only in the 'df' by filtering on the '_merge' column
train_df = merged_df[merged_df['_merge'] == 'left_only'].drop(columns=['_merge'])

# Reset the index of the 'train_df' if needed
train_df.reset_index(drop=True, inplace=True)

train_df.shape


(7990, 59)

In [41]:
encoder = prp.LabelEncoder()
X_train = train_df.drop(["filename", "label"], axis = 1)
y_train = encoder.fit_transform(train_df["label"])

In [42]:
X_test = test.drop(["filename", "label"], axis = 1)
y_test = encoder.fit_transform(test["label"])

In [43]:
min_max_scaler = prp.MinMaxScaler()
X_train_min_max_scaled = min_max_scaler.fit_transform(X_train)
X_test_min_max_scaled = min_max_scaler.transform(X_test)

In [44]:
y_test

array([0, 0, 0, ..., 9, 9, 9])

In [45]:
model_assess("xgb_no_scale_no_leak", model, X_train, X_test)

xgb_no_scale_no_leak Accuracy: 0.4725 



In [46]:
model_assess("xgb_min_max_scale_no_leak", model, X_train_min_max_scaled, X_test_min_max_scaled)

xgb_min_max_scale_no_leak Accuracy: 0.471 



In [57]:
target

Unnamed: 0,Accuracy
xgb_no_scale,0.8642
xgb_std_scale,0.8642
xgb_max_abs_scale,0.8642
xgb_min_max_scale,0.86453
xgb_no_scale_no_leak,0.4725
xgb_min_max_scale_no_leak,0.471
ANN_min_max_scaled_no_leak,0.4575


In [48]:
from tensorflow import keras
from keras.models import Sequential
model_cnn = keras.models.Sequential([
    keras.layers.Dense(2048, activation="relu", input_shape=(X_train.shape[1],)),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(1024,activation="relu"),
    keras.layers.Dropout(0.2),

    keras.layers.Dense(512,activation="relu"),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(256,activation="relu"),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(128,activation="relu"),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(64,activation="relu"),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(10, activation="softmax"),
    
])

2023-10-25 11:11:54.954063: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-25 11:11:57.081271: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-25 11:11:57.081326: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-25 11:11:57.084309: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-10-25 11:11:57.926967: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-25 11:11:57.928049: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

In [53]:
model_cnn.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics='accuracy')
model_cnn.fit(X_train_min_max_scaled, y_train, validation_split=0.2, epochs=100, batch_size=32, verbose = 0)

<keras.src.callbacks.History at 0x7efc066939a0>

In [54]:
test_loss, test_accuracy = model_cnn.evaluate(X_test_min_max_scaled, y_test, batch_size=128)
print("The test loss is :",test_loss)
print("\nThe test Accuracy is :",test_accuracy*100)

The test loss is : 17.3511905670166

The test Accuracy is : 45.750001072883606


In [55]:
target.loc["ANN_min_max_scaled_no_leak", "Accuracy"] = test_accuracy

In [56]:
target

Unnamed: 0,Accuracy
xgb_no_scale,0.8642
xgb_std_scale,0.8642
xgb_max_abs_scale,0.8642
xgb_min_max_scale,0.86453
xgb_no_scale_no_leak,0.4725
xgb_min_max_scale_no_leak,0.471
ANN_min_max_scaled_no_leak,0.4575
