In [22]:
import os
import librosa
import numpy as np
import soundfile as sf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import sounddevice as sd



In [216]:
ds = "C:/Users/abamr/free-spoken-digit-dataset/recordings"
x, y = [], []


In [215]:
def extract_features(path):
    audio, sr = librosa.load(path, sr=4500)  # Load audio 
    audio = audio / np.max(np.abs(audio))  # Normalize data

    
    mfcc =librosa.feature.mfcc(y=audio,n_fft=2044, sr=sr, n_mfcc=20)
    mfccmean = np.mean(mfcc, axis=1) 

    # Zero-Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(audio)
    zcrmean = np.mean(zcr)  

    # Combine
    features = np.concatenate((mfccmean, [zcrmean]))

    return features

In [217]:



for filename in os.listdir(ds):
    
    label = int(filename.split("_")[0])  # Extract label
        
    if label in [0, 1]:  # Keep only 0 and 1
        file_path = os.path.join(ds, filename)
        features = extract_features(file_path)
            
        x.append(features)
        y.append(label)
x=np.array(x)
y=np.array(y)



In [193]:

print("x shape:",x.shape)
print("y shape:", y.shape)

print("First 10 X values:\n", x[:10])
print("First 10 X values:\n", x[300:310])


print("\nFirst 10 Y values:\n", y[:10])
print("\nFirst 10 Y values:\n", y[300:310])

x shape: (600, 21)
y shape: (600,)
First 10 X values:
 [[-1.20565201e+02 -1.86906776e+01  2.83344936e+01 -5.17800293e+01
  -6.00223770e+01 -5.56241875e+01 -5.29638176e+01 -1.85845242e+01
  -4.24361954e+01 -6.32800484e+00 -2.21740627e+01  5.86206722e+00
  -2.33267117e+01 -1.89332259e+00 -1.68355980e+01  3.84891689e-01
  -5.88402939e+00  5.58333778e+00 -7.52498770e+00  1.01321745e+01
   2.72167969e-01]
 [-1.00610092e+02 -6.41795635e+00  1.94314079e+01 -5.31235161e+01
  -5.91527824e+01 -4.53861771e+01 -6.03139534e+01 -8.17029667e+00
  -4.29296494e+01 -3.97482800e+00 -2.00125103e+01 -8.13723755e+00
  -1.72807159e+01  4.88735247e+00 -1.73907909e+01  5.42789316e+00
  -7.25167322e+00  9.12075901e+00 -6.67035818e+00  1.37030897e+01
   2.72753906e-01]
 [-8.60442810e+01  3.88521552e+00  3.35237961e+01 -6.67588501e+01
  -5.57901764e+01 -4.74691963e+01 -6.21482391e+01 -2.67475243e+01
  -3.34148941e+01 -3.86017656e+00 -2.62303543e+01 -1.86420512e+00
  -2.73388405e+01 -6.04923439e+00 -2.00507584e+01

In [218]:
#3: Naive Bayes Classifier 
priors = {}
means = {}
variances = {}

def fit(x, y):
        # Calculate class priors
    classes = np.unique(y)
    for c in classes:
        priors[c] = np.sum(y == c) / len(y)
# get mean and variance for each feature 
        means[c] = np.mean(x[y == c], axis=0)
        variances[c] = np.var(x[y == c], axis=0)


def likelihood(x, c):
        mean = means[c]
        variance =variances[c]
        # Apply Gaussian distribution formula for each feature
        exponent = np.exp(- (x - mean) ** 2 / (2 * variance))
        return (1 / np.sqrt(2 * np.pi * variance)) * exponent

def predict(X):
    predictions = []
    for sample in X:
        class_probs = {}
        for c in priors:
                # Calculate likelihood for each feature using Gaussian distribution
            _likelihood = np.prod(likelihood(sample, c))
            class_probs[c] = _likelihood * priors[c]
            # Predict the class 
        predictions.append(max(class_probs, key=class_probs.get))
    return np.array(predictions)

    



X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

fit(X_train, y_train)

y_pred=predict(X_test)

accuracy= accuracy_score(y_pred,y_test)
print(accuracy*100)










99.16666666666667


In [248]:
import tempfile


def record_audio(duration=0.9, sample_rate=22050):
    print("Recording...")
    audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sd.wait()
    print("Done recording.")
    return np.squeeze(audio)

audio = record_audio()
sd.play(audio, samplerate=22050)
sd.wait()
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
    temp_path = tmp.name
    sf.write(temp_path, audio, 22050)

features = extract_features(temp_path)

features = features.reshape(1, -1)
print("Features shape:", features.shape)
# Now, you can use the extracted features to make a prediction
prediction = predict(features)



print(f"Predicted class: {prediction[0]}")



      

Recording...
Done recording.
Features shape: (1, 21)
Predicted class: 1


In [131]:
def majority_voting(predictions):
    predictions = np.array(predictions)
    return np.array([np.bincount(pred.astype(int)).argmax() for pred in predictions.T])



def bagging_naive_bayes(X_train, y_train, X_test, n_estimators=50):

    predictions=[]

    for i in range(n_estimators):
        # bootstrap sample
        indices = np.random.choice(range(X_train.shape[0]), size=X_train.shape[0], replace=True)
        X_bootstrap = X_train[indices]
        y_bootstrap = y_train[indices]
        
        #Train
        fit(X_bootstrap, y_bootstrap)
        
        # Store the predictions for the current model
        predictions.append(predict(X_test)) 
    
    # Apply majority voting
    final_predictions = majority_voting(predictions)
    return final_predictions


  
y_pred_bagging = bagging_naive_bayes(X_train, y_train, X_test, n_estimators=50)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_bagging)
print(f"Accuracy of Bagging with Naïve Bayes: {accuracy*100}")

Accuracy of Bagging with Naïve Bayes: 75.5


In [249]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def bagging_logistic_regression(X_train, y_train, X_test, n_estimators=50):
    # Store predictions for each model
    predictions = []

    for i in range(n_estimators):
    
        indices = np.random.choice(range(X_train.shape[0]), size=X_train.shape[0], replace=True)
        X_bootstrap = X_train[indices]
        y_bootstrap = y_train[indices]
        
    
        model = LogisticRegression(max_iter=1000)  
        model.fit(X_bootstrap, y_bootstrap)
        predictions.append(model.predict(X_test)) 

    final_predictions = majority_voting(predictions)
    return final_predictions
y_pred_bagging = bagging_logistic_regression(X_train, y_train, X_test, n_estimators=50)

accuracy = accuracy_score(y_test, y_pred_bagging)
print(f"Accuracy of Bagging with Logistic Regression: {accuracy*100}")

Accuracy of Bagging with Logistic Regression: 100.0
