In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
import time
import random
import joblib
import matplotlib.pyplot as plt

np.random.seed(42)
random.seed(42)


features_df = pd.read_csv('features_extracted.csv')

featurecol = features_df.columns[:-2]
X = features_df[featurecol].values
y = features_df['sound_type'].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

n_components = 25 
pca = PCA(n_components=n_components)

start_time = time.time()

X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

end_time = time.time() 


exec_time = end_time - start_time
print(f"Execution Time: {exec_time:.4f} seconds, for {n_components} components")  # Print execution time

#SVM
classifier = SVC(kernel='rbf', C=100, gamma='auto', random_state=42, probability=True)
classifier.fit(X_train_pca, y_train)


y_pred = classifier.predict(X_test_pca)

print("Classification Report:")
print(classification_report(y_test, y_pred))


unique_classes = np.unique(y) 
cm = confusion_matrix(y_test, y_pred, labels=unique_classes)
cm_df = pd.DataFrame(cm, index=unique_classes, columns=unique_classes)
print("\nConfusion Matrix:")
print(cm_df)

joblib.dump(classifier, 'svm_model.pkl') 
joblib.dump(scaler, 'scaler.pkl')  
joblib.dump(pca, 'pca_model.pkl')  

Execution Time: 0.6713 seconds, for 25 components
Classification Report:
              precision    recall  f1-score   support

  Sound_Drum       0.97      0.95      0.96       274
Sound_Guitar       0.94      0.96      0.95       293
 Sound_Piano       0.98      0.93      0.96       210
Sound_Violin       0.96      0.99      0.98       527
     Unknown       0.00      0.00      0.00         9

    accuracy                           0.96      1313
   macro avg       0.77      0.77      0.77      1313
weighted avg       0.95      0.96      0.96      1313


Confusion Matrix:
              Sound_Drum  Sound_Guitar  Sound_Piano  Sound_Violin  Unknown
Sound_Drum           259             7            2             6        0
Sound_Guitar           0           282            0            11        0
Sound_Piano            0             9          196             4        1
Sound_Violin           2             0            1           524        0
Unknown                6             1      

['pca_model.pkl']

In [9]:
import numpy as np
import joblib
import librosa

# Function to extract features from a WAV file
def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    
    # MFCCs
    mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
    
    # Chroma
    chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate).T, axis=0)
    
    # Spectral Contrast
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=audio, sr=sample_rate).T, axis=0)
    
    # Zero Crossing Rate
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=audio).T, axis=0)
    
    # Root Mean Square Energy (RMSE)
    rmse = np.mean(librosa.feature.rms(y=audio).T, axis=0)
    
    # Harmonic-to-Noise Ratio (HNR)
    hnr = np.mean(librosa.effects.hpss(audio)[1])  # Harmonic part for HNR
    
    # Spectral Centroid
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sample_rate).T, axis=0)
    
    # Spectral Bandwidth
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=audio, sr=sample_rate).T, axis=0)
    
    # Spectral Flatness
    spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=audio).T, axis=0)
    
    # Spectral Rolloff
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, roll_percent=0.85).T, axis=0)
    
    # Short-time Fourier Transform (STFT) for spectral flux
    stft = np.abs(librosa.stft(audio))
    spectral_flux = np.mean(np.sqrt(np.sum(np.diff(stft, axis=1)**2, axis=0)))  # Mean spectral flux
    
    # Combine all features into a single array
    return np.hstack((mfccs, chroma, spectral_contrast, zcr, rmse, [hnr], spectral_centroid, spectral_bandwidth, spectral_flatness, spectral_rolloff, spectral_flux))

# Load the trained model and PCA model
model = joblib.load('svm_model.pkl')
pca = joblib.load('pca_model.pkl')  # Load your PCA model

# Path to the WAV file you want to predict
file_path = "C:\\Users\\nandh\\Downloads\\un_padham.wav"

# Extract features from the WAV file
features = extract_features(file_path)

# Reshape the features for prediction (model expects 2D input)
features = features.reshape(1, -1)

# Transform features using PCA
features_pca = pca.transform(features)

# Make prediction
prediction = model.predict(features_pca)

# Print the predicted class
print(f'The predicted class is: {prediction[0]}')

The predicted class is: Sound_Violin
