In [None]:
import numpy as np
import os
import librosa
import soundfile as sf

# Parameters
fs = 44100  # Target sampling rate
n_mfcc = 13  # Number of MFCC features
n_fft = 2048  # Interval to apply FFT
hop_length = 512  # Sliding window for FFT
num_segments = 2  # Number of segments to divide each audio file
max_length = 300  # Maximum length of MFCC features (number of frames)

# Directory containing audio files
input_dir = 'wav files'  # Replace with your directory containing .wav files

# Initialize lists to store features and labels
features = []
labels = []

# Function to extract MFCC features from an audio file segment
def extract_mfcc_segment(signal, sample_rate, start, finish):
    mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    return mfcc.T

# Function to extract oximeter reading from file name
def extract_oximeter_reading(file_name):
    oximeter_reading = int(''.join(filter(str.isdigit, file_name)))
    return oximeter_reading

# Pad or truncate MFCC features to a consistent length
def pad_or_truncate(mfcc, max_length):
    if len(mfcc) < max_length:
        pad_width = max_length - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_length]
    return mfcc

# Process each file in the directory
for filename in os.listdir(input_dir):
    if filename.endswith(".wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"Processing file: {file_path}")
        
        # Load audio file
        audio, original_fs = sf.read(file_path)
        if len(audio.shape) > 1:
            audio = librosa.to_mono(audio.T)
        if original_fs != fs:
            audio = librosa.resample(audio, orig_sr=original_fs, target_sr=fs)
        
        # Calculate segment length and number of MFCC vectors per segment
        samples_per_segment = int(len(audio) / num_segments)
        num_mfcc_vectors_per_segment = int(np.ceil(samples_per_segment / hop_length))

        # Extract MFCC features from each segment
        for segment in range(num_segments):
            start = samples_per_segment * segment
            finish = start + samples_per_segment
            
            mfcc_features = extract_mfcc_segment(audio, fs, start, finish)
            mfcc_features = pad_or_truncate(mfcc_features, max_length)
            
            # Append the MFCC features of the current segment
            features.append(mfcc_features)
            # Append the label (oximeter reading) of the current segment
            oximeter_reading = extract_oximeter_reading(filename)
            labels.append(oximeter_reading)
            print(f"{file_path}, segment:{segment + 1}, label:{oximeter_reading}")
            print(features)

# Convert lists to numpy arrays
features = np.array(features)
labels = np.array(labels)

print(f"Extracted features shape: {features.shape}")
print(f"Labels shape: {labels.shape}")


In [5]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def plot_mfcc_label_correlation(features, labels):
    # Aggregate MFCC features by taking the mean across the frames (axis=1)
    mean_mfcc_features = np.mean(features, axis=1)
    
    # Create a DataFrame from the mean MFCC features and labels
    df = pd.DataFrame(mean_mfcc_features, columns=[f'MFCC_{i+1}' for i in range(mean_mfcc_features.shape[1])])
    df['label'] = labels

    # Compute the correlation matrix
    correlation_matrix = df.corr()

    # Plot the heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', cbar=True, vmin=-1, vmax=1)
    plt.title('Correlation Heatmap of Mean MFCC Features and Labels')
    plt.xlabel('MFCC Features and Label')
    plt.ylabel('MFCC Features and Label')
    plt.show()

# Assuming the features and labels are already defined in your previous code
plot_mfcc_label_correlation(features, labels)



AxisError: axis 1 is out of bounds for array of dimension 1

In [10]:
import numpy as np
import os
import librosa
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import Dense, Flatten
from sklearn.model_selection import train_test_split

# Parameters
fs = 44100  # Target sampling rate
n_mfcc = 13  # Number of MFCC features
n_fft = 2048  # Interval to apply FFT
hop_length = 512  # Sliding window for FFT
num_segments = 2  # Number of segments to divide each audio file
max_length = 300  # Maximum length of MFCC features (number of frames)

# Directory containing audio files
input_dir = 'wav files'  # Replace with your directory containing .wav files

# Initialize lists to store features and labels
features = []
labels = []

# Function to extract MFCC features from an audio file segment
def extract_mfcc_segment(signal, sample_rate, start, finish):
    mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    return mfcc.T

# Function to extract oximeter reading from file name
def extract_oximeter_reading(file_name):
    oximeter_reading = int(''.join(filter(str.isdigit, file_name)))
    return oximeter_reading

# Pad or truncate MFCC features to a consistent length
def pad_or_truncate(mfcc, max_length):
    if len(mfcc) < max_length:
        pad_width = max_length - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_length]
    return mfcc

# Process each file in the directory
for filename in os.listdir(input_dir):
    if filename.endswith(".wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"Processing file: {file_path}")
        
        # Load audio file
        audio, original_fs = sf.read(file_path)
        if len(audio.shape) > 1:
            audio = librosa.to_mono(audio.T)
        if original_fs != fs:
            audio = librosa.resample(audio, orig_sr=original_fs, target_sr=fs)
        
        # Calculate segment length and number of MFCC vectors per segment
        samples_per_segment = int(len(audio) / num_segments)
        num_mfcc_vectors_per_segment = int(np.ceil(samples_per_segment / hop_length))

        # Extract MFCC features from each segment
        for segment in range(num_segments):
            start = samples_per_segment * segment
            finish = start + samples_per_segment
            
            mfcc_features = extract_mfcc_segment(audio, fs, start, finish)
            mfcc_features = pad_or_truncate(mfcc_features, max_length)
            
            # Append the MFCC features of the current segment
            features.append(mfcc_features)
            # Append the label (oximeter reading) of the current segment
            oximeter_reading = extract_oximeter_reading(filename)
            labels.append(oximeter_reading)
            print(f"{file_path}, segment:{segment + 1}, label:{oximeter_reading}")

# Convert lists to numpy arrays
features = np.array(features)
labels = np.array(labels)

print(f"Extracted features shape: {features.shape}")
print(f"Labels shape: {labels.shape}")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential([
    Flatten(input_shape=(max_length, n_mfcc)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')  # Regression output
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Save the trained model
model_save_path = 'trained_model.h5'
model.save(model_save_path)
print(f"Model saved to {model_save_path}")

# Load the model (you can load it in a different script or session)
model = load_model(model_save_path)
print(f"Model loaded from {model_save_path}")

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test Mean Absolute Error: {mae}")

# Predict the labels
y_pred = model.predict(X_test)

# Calculate the accuracy percentage
tolerance = 5  # Define a tolerance level for the predictions
accurate_predictions = np.sum(np.abs(y_pred.flatten() - y_test) <= tolerance)
accuracy_percentage = (accurate_predictions / len(y_test)) * 100
print(f"Accuracy Percentage: {accuracy_percentage:.2f}%")

# Print the predictions and actual labels
for i in range(len(y_test)):
    print(f"Actual: {y_test[i]}, Predicted: {y_pred[i][0]}")


Processing file: wav files\A,Gokul,p95,m,sitting.wav
wav files\A,Gokul,p95,m,sitting.wav, segment:1, label:95
wav files\A,Gokul,p95,m,sitting.wav, segment:2, label:95
Processing file: wav files\A,kundan,p126,m,cycling.wav
wav files\A,kundan,p126,m,cycling.wav, segment:1, label:126
wav files\A,kundan,p126,m,cycling.wav, segment:2, label:126
Processing file: wav files\A,lala,p108,m,basket ball.wav
wav files\A,lala,p108,m,basket ball.wav, segment:1, label:108
wav files\A,lala,p108,m,basket ball.wav, segment:2, label:108
Processing file: wav files\A,Vishwanath,p100,m,sitting.wav
wav files\A,Vishwanath,p100,m,sitting.wav, segment:1, label:100
wav files\A,Vishwanath,p100,m,sitting.wav, segment:2, label:100
Processing file: wav files\a103.wav
wav files\a103.wav, segment:1, label:103
wav files\a103.wav, segment:2, label:103
Processing file: wav files\a113.wav
wav files\a113.wav, segment:1, label:113
wav files\a113.wav, segment:2, label:113
Processing file: wav files\a88.wav
wav files\a88.wav, 



Model saved to trained_model.h5




Model loaded from trained_model.h5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 246ms/step - loss: 298.8050 - mean_absolute_error: 14.6758
Test Mean Absolute Error: 14.675779342651367




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
Accuracy Percentage: 14.29%
Actual: 85, Predicted: 56.4177360534668
Actual: 95, Predicted: 74.56463623046875
Actual: 86, Predicted: 94.73439025878906
Actual: 108, Predicted: 83.40609741210938
Actual: 68, Predicted: 96.97932434082031
Actual: 118, Predicted: 91.18647766113281
Actual: 113, Predicted: 95.68280029296875
Actual: 79, Predicted: 84.36100769042969
Actual: 74, Predicted: 62.00930404663086
Actual: 88, Predicted: 97.23451232910156
Actual: 77, Predicted: 80.73393249511719
Actual: 103, Predicted: 110.24952697753906
Actual: 94, Predicted: 102.12939453125
Actual: 108, Predicted: 112.30587005615234


In [11]:
import numpy as np
import librosa
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.models import load_model
import sounddevice as sd

# Parameters
fs = 44100  # Target sampling rate
n_mfcc = 13  # Number of MFCC features
n_fft = 2048  # Interval to apply FFT
hop_length = 512  # Sliding window for FFT
max_length = 300  # Maximum length of MFCC features (number of frames)
record_duration = 6  # Duration of the recording in seconds

# Load the trained model
model_save_path = 'trained_model.h5'
model = load_model(model_save_path)
print(f"Model loaded from {model_save_path}")

# Record audio for the specified duration
print("Recording...")
audio = sd.rec(int(record_duration * fs), samplerate=fs, channels=1, dtype='float32')
sd.wait()  # Wait until recording is finished
print("Recording finished")

# Convert to mono if needed
if audio.ndim > 1:
    audio = librosa.to_mono(audio.T)

# Extract MFCC features from the recorded audio
def extract_mfcc_from_audio(audio, sample_rate, n_mfcc, n_fft, hop_length, max_length):
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    mfcc = mfcc.T
    # Pad or truncate MFCC features to a consistent length
    if len(mfcc) < max_length:
        pad_width = max_length - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_length]
    return mfcc

mfcc_features = extract_mfcc_from_audio(audio.flatten(), fs, n_mfcc, n_fft, hop_length, max_length)
mfcc_features = np.expand_dims(mfcc_features, axis=0)  # Add batch dimension

# Predict the output using the model
prediction = model.predict(mfcc_features)
print(f"Predicted Value: {prediction[0][0]}")





Model loaded from trained_model.h5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 403ms/step - loss: 298.8050 - mean_absolute_error: 14.6758
Test Mean Absolute Error: 14.675779342651367




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step
Accuracy Percentage: 14.29%
Actual: 85, Predicted: 56.4177360534668
Actual: 95, Predicted: 74.56463623046875
Actual: 86, Predicted: 94.73439025878906
Actual: 108, Predicted: 83.40609741210938
Actual: 68, Predicted: 96.97932434082031
Actual: 118, Predicted: 91.18647766113281
Actual: 113, Predicted: 95.68280029296875
Actual: 79, Predicted: 84.36100769042969
Actual: 74, Predicted: 62.00930404663086
Actual: 88, Predicted: 97.23451232910156
Actual: 77, Predicted: 80.73393249511719
Actual: 103, Predicted: 110.24952697753906
Actual: 94, Predicted: 102.12939453125
Actual: 108, Predicted: 112.30587005615234
