In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler  # Import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
import time
import random
import joblib
np.random.seed(42)
random.seed(42)

# Step 1: Load the Data
features_df = pd.read_csv('Extended_features_extracted.csv')

# Step 2: Prepare the Data
featurecol = features_df.columns[:-2]
X = features_df[featurecol].values
y = features_df['sound_type'].values 

# Step 3: Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Apply PCA for Dimensionality Reduction
n_components = 25  # Adjust this based on your needs (e.g., 10, 15, or 20)
pca = PCA(n_components=n_components)

# Start time measurement
start_time = time.time()  # Record start time

X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# End time measurement
end_time = time.time()  # Record end time

# Calculate PCA execution time
exec_time = end_time - start_time
print(f"Execution Time: {exec_time:.4f} seconds, for {n_components} components")  # Print execution time

# Step 5: Train an SVM Classifier on the PCA-transformed data
classifier = SVC(kernel='rbf', C=100, gamma='auto', random_state=42,probability=True)
classifier.fit(X_train_pca, y_train)

# Step 6: Evaluate the Classifier
y_pred = classifier.predict(X_test_pca)

labels = ["Sound_Violin", "Sound_Piano", "Sound_Guitar", "Sound_Drum", "Unknown"]

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=labels))

cm = confusion_matrix(y_test, y_pred, labels=labels)
cm_df = pd.DataFrame(cm, index=labels, columns=labels)
print("\nConfusion Matrix:")
print(cm_df)
joblib.dump(classifier, 'exsvm_model.pkl') 
joblib.dump(scaler, 'exscaler.pkl')  # Save the scaler
joblib.dump(pca, 'expca_model.pkl')  

Execution Time: 0.0329 seconds, for 25 components
Classification Report:
              precision    recall  f1-score   support

Sound_Violin       0.96      0.96      0.96       131
 Sound_Piano       0.89      0.77      0.83        53
Sound_Guitar       0.91      0.96      0.93       186
  Sound_Drum       0.99      0.99      0.99       281
     Unknown       1.00      0.17      0.29         6

    accuracy                           0.95       657
   macro avg       0.95      0.77      0.80       657
weighted avg       0.95      0.95      0.95       657


Confusion Matrix:
              Sound_Violin  Sound_Piano  Sound_Guitar  Sound_Drum  Unknown
Sound_Violin           278            2             0           1        0
Sound_Piano              2          179             5           0        0
Sound_Guitar             0           12            41           0        0
Sound_Drum               1            4             0         126        0
Unknown                  1            0     

['expca_model.pkl']

In [6]:
import numpy as np
import sounddevice as sd
import librosa
import joblib
from sklearn.metrics.pairwise import cosine_similarity
import time

# Load your trained SVM model, scaler, and PCA model
model = joblib.load('exsvm_model.pkl')
scaler = joblib.load('exscaler.pkl')
pca = joblib.load('expca_model.pkl')

# Amplitude threshold for detecting sound (adjust based on your environment)
AMPLITUDE_THRESHOLD = 0.00 # Experiment with this value to filter out quiet sounds

# Function to check if the audio amplitude exceeds the threshold
def is_sound_present(audio):
    return np.max(np.abs(audio)) > AMPLITUDE_THRESHOLD

# Function to extract features from audio
def extract_features(audio, sample_rate=44100):
    # MFCCs
    mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
    # Chroma
    chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate).T, axis=0)
    # Spectral Contrast
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=audio, sr=sample_rate).T, axis=0)
    # Zero Crossing Rate
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=audio).T, axis=0)
    # Root Mean Square Energy (RMSE)
    rmse = np.mean(librosa.feature.rms(y=audio).T, axis=0)
    # Harmonic-to-Noise Ratio (HNR)
    hnr = np.mean(librosa.effects.hpss(audio)[1])
    # Spectral Centroid
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sample_rate).T, axis=0)
    # Spectral Bandwidth
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=audio, sr=sample_rate).T, axis=0)
    # Spectral Flatness
    spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=audio).T, axis=0)
    # Spectral Rolloff
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, roll_percent=0.85).T, axis=0)
    stft = np.abs(librosa.stft(audio))
    spectral_flux = np.mean(np.sqrt(np.sum(np.diff(stft, axis=1) ** 2, axis=0)))
    return np.hstack((mfccs, chroma, spectral_contrast, zcr, rmse, [hnr], spectral_centroid, spectral_bandwidth, spectral_flatness, spectral_rolloff, spectral_flux))

def record_audio_continuous(sample_rate=44100, chunk_duration=3):
    print("Recording continuously...")
    while True:
        chunk = sd.rec(int(chunk_duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
        sd.wait()  # Wait for the chunk to be recorded
        yield chunk.flatten()

# Continuous recording and classification
audio_generator = record_audio_continuous()

print("Start playing your instrument...")

for audio in audio_generator:
    if is_sound_present(audio):
        # Extract features and preprocess
        features = extract_features(audio)
        features_scaled = scaler.transform([features])
        features_pca = pca.transform(features_scaled)

        # Make a prediction
        prediction = model.predict(features_pca)
        instrument = prediction[0]
        
        # Check the probability/confidence of the prediction
        probabilities = model.predict_proba(features_pca)
        confidence = np.max(probabilities)

        if confidence > 0.7:  # Adjust the confidence threshold as needed
            print(f"Detected Instrument: {instrument} (Confidence: {confidence * 100:.2f}%)")
        else:
            print("Ambient noise detected (Confidence too low)")
    else:
        print("No music detected")

    time.sleep(1)  # Pause before the next prediction

Start playing your instrument...
Recording continuously...
Detected Instrument: Sound_Drum (Confidence: 95.56%)
Detected Instrument: Sound_Drum (Confidence: 97.63%)
Detected Instrument: Sound_Drum (Confidence: 98.70%)
Detected Instrument: Sound_Drum (Confidence: 97.20%)
Detected Instrument: Sound_Drum (Confidence: 97.92%)
Detected Instrument: Sound_Drum (Confidence: 75.99%)


KeyboardInterrupt: 