In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import librosa
import librosa.display
import IPython.display as ipd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import joblib
from sklearn.metrics import f1_score, confusion_matrix
import seaborn as sns

In [2]:
folder_path = 'E:/MSc Research/MSc project/projectMSc/allRagas'

file_names = [file for file in os.listdir(folder_path) if file.endswith('.wav')]


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'D:/projectMSc/allRagas'

In [None]:
Instrument_names = [name[:-9] for name in file_names]

print(file_names[:5])
print(Instrument_names[:5])

In [None]:
def features_extractor(file_or_audio, sample_rate=None):
    if isinstance(file_or_audio, str):
        audio, sample_rate = librosa.load(file_or_audio)
    else:
        audio = file_or_audio

    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs.T, axis=0)
    return mfccs_scaled_features

extracted_features = []

In [None]:
for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    features = features_extractor(file_path)
    extracted_features.append([features, file_name[:-9]])

features_df = pd.DataFrame(extracted_features, columns=['feature', 'class'])
features_df.head()

X = np.array(features_df['feature'].tolist())
y = np.array(features_df['class'].tolist())


In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=127)



In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

In [None]:
joblib_file = "knn_model.pkl"
joblib.dump(knn, joblib_file)
print(f"Model saved to {joblib_file}")

In [None]:
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test set accuracy: {accuracy * 100}%")



In [None]:
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Test set F1 score: {f1}")

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()



In [None]:
knn_loaded = joblib.load(joblib_file)
print("Model loaded successfully")


In [None]:
file = "Hues of _ Abinaya Shenbagaraj _ Raghavasimhan Sa _  Vox Et Violon _ Thiruppugazh _.wav"
x, sr1 = librosa.load(file)
ipd.Audio(x, rate=sr1)

In [None]:
prediction_feature = features_extractor(file)
prediction_feature = prediction_feature.reshape(1, -1)
predicted_class_label = knn.predict(prediction_feature)
prediction_class = le.inverse_transform(predicted_class_label)
print("Predicted class:", prediction_class[0])