In [1]:
!pip install xgboost
!pip install nbformat

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
import librosa
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [3]:
import os
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from scipy.stats import skew, kurtosis

INDEX = 13
DATA_DIR = './new-test-data/test-mfcc-v2'
DATA2_DIR = './new-test-data/test-mfcc-v2-copy'

def aggregate_mfcc_selective(mfcc_data):
    mfcc_selected = mfcc_data[:20, :] # Select first 20 MFCCs
    
    mfcc_mean = np.mean(mfcc_selected, axis=1)
    mfcc_std = np.std(mfcc_selected, axis=1)
    mfcc_max = np.max(mfcc_selected, axis=1)
    mfcc_min = np.min(mfcc_selected, axis=1)
    
    features = np.concatenate([mfcc_mean, mfcc_std, mfcc_max, mfcc_min])
    return features

def aggregate_mfcc_selective2(mfcc_data):
    mfcc_selected = mfcc_data[:3, :] # Select first 20 MFCCs
    
    mfcc_mean = np.mean(mfcc_selected, axis=1)
    mfcc_std = np.std(mfcc_selected, axis=1)
    mfcc_max = np.max(mfcc_selected, axis=1)
    mfcc_min = np.min(mfcc_selected, axis=1)
    
    features = np.concatenate([mfcc_mean, mfcc_std, mfcc_max, mfcc_min])
    return features

# Initialize lists to store features, file names, and labels
mfcc_all_songs = []
file_names = []
generated_features = []
labels = []

# total number of files in the directory
total_files = len(os.listdir(DATA_DIR))
index = 1

# Iterate over all files in the specified directory
for file_name in os.listdir(DATA_DIR):
    file_path = os.path.join(DATA_DIR, file_name)
    file_path2 = os.path.join(DATA2_DIR, file_name)
    print(file_path, file_path2)
    mfcc_data = pd.read_csv(file_path, header=None).values
    mfcc_data = mfcc_data[:20] # Select first 20 MFCCs
    mfcc_data2 = pd.read_csv(file_path2, header=None).values
    # Compute aggregated MFCC features
    aggregated_features = aggregate_mfcc_selective(mfcc_data)
    aggregated_features2 = aggregate_mfcc_selective2(mfcc_data2)

    skewness = skew(mfcc_data, axis=1)
    kurt = kurtosis(mfcc_data, axis=1)
    range_max_min = np.ptp(mfcc_data, axis=1)

    delta_mfcc = librosa.feature.delta(mfcc_data, order=1)
    delta_delta_mfcc = librosa.feature.delta(mfcc_data, order=2)

    delta_mean = np.mean(delta_mfcc, axis=1)
    delta_std = np.std(delta_mfcc, axis=1)
    delta_max = np.max(delta_mfcc, axis=1)
    delta_min = np.min(delta_mfcc, axis=1)

    delta_delta_mean = np.mean(delta_mfcc, axis=1)
    delta_delta_std = np.std(delta_mfcc, axis=1)
    delta_delta_max = np.max(delta_mfcc, axis=1)
    delta_delta_min = np.min(delta_mfcc, axis=1)

    delta_mfcc2 = librosa.feature.delta(mfcc_data2, order=1)
    delta_delta_mfcc2 = librosa.feature.delta(mfcc_data2, order=2)

    delta_mean2 = np.mean(delta_mfcc2, axis=1)
    delta_std2 = np.std(delta_mfcc2, axis=1)
    delta_max2 = np.max(delta_mfcc2, axis=1)
    delta_min2 = np.min(delta_mfcc2, axis=1)

    delta_delta_mean2 = np.mean(delta_mfcc2, axis=1)
    delta_delta_std2 = np.std(delta_mfcc2, axis=1)
    delta_delta_max2 = np.max(delta_mfcc2, axis=1)
    delta_delta_min2 = np.min(delta_mfcc2, axis=1)

    PCA_mfcc = PCA(n_components=5)
    PCA_mfcc.fit(mfcc_data.T)
    PCA_mfcc_features = PCA_mfcc.components_.flatten()

    # Compile all features into a single vector
    features = np.concatenate([
        aggregated_features.flatten(),
        range_max_min.flatten(),
        delta_mean.flatten(),
        delta_std.flatten(),
        delta_max.flatten(),
        delta_min.flatten(),
        delta_mean2.flatten(),
        delta_std2.flatten(),
        delta_max2.flatten(),
        delta_min2.flatten(),
        PCA_mfcc_features.flatten(),
    ])

    # Determine label based on filename pattern
    if file_name.startswith('Asha-Bhosle'):
        label = 'Asha Bhosale'
    elif file_name.startswith('Kishore-Kumar'):
        label = 'Kishore Kumar'
    elif file_name.startswith('Lavni'):
        label = 'Lavni'
    elif file_name.startswith('Michael-Jackson'):
        label = 'Michael Jackson'
    elif file_name.startswith('Jana-Gana-Mana'):
        label = 'Jana-Gana-Mana'
    elif file_name.startswith('Bhavgeet'):
        label = 'Bhavgeet'
    else:
        print(f'Unrecognized label for file {file_name}')
        
    print(f'Processed {file_name}, {index}/{total_files}')
    # print(features.shape)
    generated_features.append(features)
    file_names.append(file_name)
    labels.append(label)
    index += 1

# Create DataFrame and save to CSV
generated_features = np.vstack(generated_features)
total_features = generated_features.shape[1]
feature_columns = [f'feature_{i}' for i in range(total_features)]

total_data_df = pd.DataFrame(generated_features, columns=feature_columns)
total_data_df.insert(0, 'File', file_names)
total_data_df.insert(1, 'Label', labels)

# Save generated features to 'features_generated.csv'
total_data_df.to_csv(f'features_generated_{INDEX}.csv', index=False)


./new-test-data/test-mfcc-v2/Asha-Bhosle-23.csv ./new-test-data/test-mfcc-v2-copy/Asha-Bhosle-23.csv
Processed Asha-Bhosle-23.csv, 1/495
./new-test-data/test-mfcc-v2/Michael-Jackson-55.csv ./new-test-data/test-mfcc-v2-copy/Michael-Jackson-55.csv
Processed Michael-Jackson-55.csv, 2/495
./new-test-data/test-mfcc-v2/Kishore-Kumar-63.csv ./new-test-data/test-mfcc-v2-copy/Kishore-Kumar-63.csv
Processed Kishore-Kumar-63.csv, 3/495
./new-test-data/test-mfcc-v2/Asha-Bhosle-32.csv ./new-test-data/test-mfcc-v2-copy/Asha-Bhosle-32.csv
Processed Asha-Bhosle-32.csv, 4/495
./new-test-data/test-mfcc-v2/Jana-Gana-Mana-8-MFCC.csv ./new-test-data/test-mfcc-v2-copy/Jana-Gana-Mana-8-MFCC.csv
Processed Jana-Gana-Mana-8-MFCC.csv, 5/495
./new-test-data/test-mfcc-v2/Bhavgeet-47.csv ./new-test-data/test-mfcc-v2-copy/Bhavgeet-47.csv
Processed Bhavgeet-47.csv, 6/495
./new-test-data/test-mfcc-v2/Lavni-20.csv ./new-test-data/test-mfcc-v2-copy/Lavni-20.csv
Processed Lavni-20.csv, 7/495
./new-test-data/test-mfcc-v2/

In [4]:

#perform EDA here
#Correlation HeatMap
#Scatterplots and Boxplots


In [5]:

# Splitting the data into features (X) and labels (y)
X = total_data_df.drop(columns=['File', 'Label'])
y = total_data_df['Label']


In [16]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [17]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier

le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# Dictionary to store models and results
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    #'SVM': SVC(kernel='rbf', C=1.0, probability=True), # SVC with probability=True for probability estimates
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, learning_rate=0.1),
    'AdaBoost': AdaBoostClassifier(n_estimators=50),
    'Naive Bayes': GaussianNB(),
    'Decision Tree': DecisionTreeClassifier(max_depth=10),
    'XGBoost': xgb.XGBClassifier(n_estimators=100, learning_rate=0.1),
    'SVC_linear': SVC(kernel='linear', probability=True),
    'SVC_rbf': SVC(kernel='rbf', probability=True),
    'RandomForest_1': RandomForestClassifier(min_samples_leaf=1),
    'RandomForest_3': RandomForestClassifier(min_samples_leaf=3),
    'RandomForest_5': RandomForestClassifier(min_samples_leaf=5),
    'NeuralNetwork_1': MLPClassifier(hidden_layer_sizes=(5)),
    'NeuralNetwork_2': MLPClassifier(hidden_layer_sizes=(5, 5)),
    'NeuralNetwork_3': MLPClassifier(hidden_layer_sizes=(5, 5, 5)),
    'NeuralNetwork_4': MLPClassifier(hidden_layer_sizes=(10))

}


In [18]:
import plotly.graph_objects as go

results = {}

# Training, prediction, and metric calculations
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")
    model.fit(X_train, y_train_encoded)  # Train the model
    y_pred = model.predict(X_test)  # Predict on the test set
    
    # Storing metrics
    results[model_name] = {
        "Accuracy": accuracy_score(y_test_encoded, y_pred),
        "Precision": precision_score(y_test_encoded, y_pred, average='weighted'),
        "Recall": recall_score(y_test_encoded, y_pred, average='weighted'),
        "F1 Score": f1_score(y_test_encoded, y_pred, average='weighted'),
        "Confusion Matrix": confusion_matrix(y_test_encoded, y_pred),
        "Classification Report": classification_report(y_test_encoded, y_pred)
    }

    # Plotting the confusion matrix using Plotly with numbers displayed and square grid
    confusion_mat = results[model_name]["Confusion Matrix"]
    fig = go.Figure(data=go.Heatmap(
        z=confusion_mat,
        x=[f"Predicted {i}" for i in range(len(confusion_mat))],
        y=[f"Actual {i}" for i in range(len(confusion_mat))],
        colorscale="Blues",
        text=confusion_mat,  # Display numbers in each cell
        texttemplate="%{text}",  # Format to show text values
        showscale=True
    ))
    fig.update_layout(
        title=f"Confusion Matrix for {model_name}",
        xaxis_title="Predicted Label",
        yaxis_title="Actual Label",
        xaxis=dict(scaleanchor="y", scaleratio=1)  # Ensures square cells
    )
    fig.show()



Training Logistic Regression...



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression




Training K-Nearest Neighbors...



Training Random Forest...



Training Gradient Boosting...



Training AdaBoost...







Training Naive Bayes...



Training Decision Tree...



Training XGBoost...



Training SVC_linear...



Training SVC_rbf...



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.




Training RandomForest_1...



Training RandomForest_3...



Training RandomForest_5...



Training NeuralNetwork_1...



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.




Training NeuralNetwork_2...



Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.




Training NeuralNetwork_3...



Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.




Training NeuralNetwork_4...


In [19]:

# Display the results
for model_name, metrics in results.items():
    print(f"\nMetrics for {model_name}:\n")
    for metric, value in metrics.items():
        print(f"{metric}:\n{value}\n")



Metrics for Logistic Regression:

Accuracy:
0.6767676767676768

Precision:
0.6870679924155859

Recall:
0.6767676767676768

F1 Score:
0.6805571295685288

Confusion Matrix:
[[11  3  0  1  6  0]
 [ 1  9  0  2  3  0]
 [ 0  0 11  1  0  1]
 [ 2  0  0 17  1  1]
 [ 5  1  1  1  6  0]
 [ 1  0  0  0  1 13]]

Classification Report:
              precision    recall  f1-score   support

           0       0.55      0.52      0.54        21
           1       0.69      0.60      0.64        15
           2       0.92      0.85      0.88        13
           3       0.77      0.81      0.79        21
           4       0.35      0.43      0.39        14
           5       0.87      0.87      0.87        15

    accuracy                           0.68        99
   macro avg       0.69      0.68      0.68        99
weighted avg       0.69      0.68      0.68        99



Metrics for K-Nearest Neighbors:

Accuracy:
0.43434343434343436

Precision:
0.43716986668238317

Recall:
0.43434343434343436

F1 Sco

In [20]:
print("\nAll Models Accuracy:")
for model_name, metrics in results.items():
    print(f"{model_name}: {metrics['Accuracy']:.4f}")


All Models Accuracy:
Logistic Regression: 0.6768
K-Nearest Neighbors: 0.4343
Random Forest: 0.8283
Gradient Boosting: 0.7778
AdaBoost: 0.6061
Naive Bayes: 0.7172
Decision Tree: 0.7071
XGBoost: 0.7980
SVC_linear: 0.7273
SVC_rbf: 0.3232
RandomForest_1: 0.8283
RandomForest_3: 0.8283
RandomForest_5: 0.8182
NeuralNetwork_1: 0.1313
NeuralNetwork_2: 0.1515
NeuralNetwork_3: 0.2121
NeuralNetwork_4: 0.3434


In [11]:
print("\nAll Models Precison:")
for model_name, metrics in results.items():
    print(f"{model_name}: {metrics['Precision']:.4f}")


All Models Precison:
Logistic Regression: 0.6764
K-Nearest Neighbors: 0.3983
Random Forest: 0.8399
Gradient Boosting: 0.8183
AdaBoost: 0.4367
Naive Bayes: 0.7173
Decision Tree: 0.7142
XGBoost: 0.8483
SVC_linear: 0.7629
SVC_rbf: 0.2809
RandomForest_1: 0.8707
RandomForest_3: 0.8206
RandomForest_5: 0.8132
NeuralNetwork_1: 0.2808
NeuralNetwork_2: 0.0240
NeuralNetwork_3: 0.0395
NeuralNetwork_4: 0.1664


In [12]:
print("\nAll Models Recall:")
for model_name, metrics in results.items():
    print(f"{model_name}: {metrics['Recall']:.4f}")


All Models Recall:
Logistic Regression: 0.6644
K-Nearest Neighbors: 0.3960
Random Forest: 0.8322
Gradient Boosting: 0.8121
AdaBoost: 0.4899
Naive Bayes: 0.7114
Decision Tree: 0.7047
XGBoost: 0.8456
SVC_linear: 0.7383
SVC_rbf: 0.2550
RandomForest_1: 0.8591
RandomForest_3: 0.7987
RandomForest_5: 0.7919
NeuralNetwork_1: 0.1946
NeuralNetwork_2: 0.1544
NeuralNetwork_3: 0.1208
NeuralNetwork_4: 0.1812


In [13]:
print("\nAll Models f1-Score:")
for model_name, metrics in results.items():
    print(f"{model_name}: {metrics['F1 Score']:.4f}")


All Models f1-Score:
Logistic Regression: 0.6696
K-Nearest Neighbors: 0.3798
Random Forest: 0.8333
Gradient Boosting: 0.8116
AdaBoost: 0.4031
Naive Bayes: 0.7101
Decision Tree: 0.7014
XGBoost: 0.8446
SVC_linear: 0.7458
SVC_rbf: 0.1676
RandomForest_1: 0.8600
RandomForest_3: 0.8030
RandomForest_5: 0.7948
NeuralNetwork_1: 0.0988
NeuralNetwork_2: 0.0415
NeuralNetwork_3: 0.0480
NeuralNetwork_4: 0.1583


In [14]:
#show decision boundary
#ROC Curve

In [15]:
#identify the three songs of each category
#showcase the waveform for each of the three songs found for each category