In [4]:
import numpy as np
import os
import librosa
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import Dense, Flatten
from sklearn.model_selection import train_test_split

# Parameters
fs = 44100  # Target sampling rate
n_mfcc = 13  # Number of MFCC features
n_fft = 2048  # Interval to apply FFT
hop_length = 512  # Sliding window for FFT
num_segments = 2  # Number of segments to divide each audio file
max_length = 300  # Maximum length of MFCC features (number of frames)

# Directory containing audio files
input_dir = 'vowel-heart_dataset'  # Replace with your directory containing .wav files

# Initialize lists to store features and labels
features = []
labels = []

# Function to extract MFCC features from an audio file segment
def extract_mfcc_segment(signal, sample_rate, start, finish):
    mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    return mfcc.T

# Function to extract oximeter reading from file name
def extract_oximeter_reading(file_name):
    oximeter_reading = int(''.join(filter(str.isdigit, file_name)))
    return oximeter_reading

# Pad or truncate MFCC features to a consistent length
def pad_or_truncate(mfcc, max_length):
    if len(mfcc) < max_length:
        pad_width = max_length - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_length]
    return mfcc

# Process each file in the directory
for filename in os.listdir(input_dir):
    if filename.endswith(".wav") or filename.endswith(".Wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"Processing file: {file_path}")
        
        # Load audio file
        audio, original_fs = sf.read(file_path)
        if len(audio.shape) > 1:
            audio = librosa.to_mono(audio.T)
        if original_fs != fs:
            audio = librosa.resample(audio, orig_sr=original_fs, target_sr=fs)
        
        # Calculate segment length and number of MFCC vectors per segment
        samples_per_segment = int(len(audio) / num_segments)
        num_mfcc_vectors_per_segment = int(np.ceil(samples_per_segment / hop_length))

        # Extract MFCC features from each segment
        for segment in range(num_segments):
            start = samples_per_segment * segment
            finish = start + samples_per_segment
            
            mfcc_features = extract_mfcc_segment(audio, fs, start, finish)
            mfcc_features = pad_or_truncate(mfcc_features, max_length)
            
            # Append the MFCC features of the current segment
            features.append(mfcc_features)
            # Append the label (oximeter reading) of the current segment
            oximeter_reading = extract_oximeter_reading(filename)
            labels.append(oximeter_reading)
            print(f"{file_path}, segment:{segment + 1}, label:{oximeter_reading}")

# Convert lists to numpy arrays
features = np.array(features)
labels = np.array(labels)

print(f"Extracted features shape: {features.shape}")
print(f"Labels shape: {labels.shape}")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential([
    Flatten(input_shape=(max_length, n_mfcc)),
    Dense(256, activation='relu'),
   # Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')  # Regression output
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Save the trained model
model_save_path = 'trained_model.h5'
model.save(model_save_path)
print(f"Model saved to {model_save_path}")

# Load the model (you can load it in a different script or session)
model = load_model(model_save_path)
print(f"Model loaded from {model_save_path}")

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test Mean Absolute Error: {mae}")

# Predict the labels
y_pred = model.predict(X_test)

# Calculate the accuracy percentage
tolerance = 5  # Define a tolerance level for the predictions
accurate_predictions = np.sum(np.abs(y_pred.flatten() - y_test) <= tolerance)
accuracy_percentage = (accurate_predictions / len(y_test)) * 100
print(f"Accuracy Percentage: {accuracy_percentage:.2f}%")

# Print the predictions and actual labels
for i in range(len(y_test)):
    print(f"Actual: {y_test[i]}, Predicted: {y_pred[i][0]}")


Processing file: vowel-heart_dataset\a,abhay,75,sitting.Wav
vowel-heart_dataset\a,abhay,75,sitting.Wav, segment:1, label:75
vowel-heart_dataset\a,abhay,75,sitting.Wav, segment:2, label:75
Processing file: vowel-heart_dataset\A,Gokul,p95,m,sitting.wav
vowel-heart_dataset\A,Gokul,p95,m,sitting.wav, segment:1, label:95
vowel-heart_dataset\A,Gokul,p95,m,sitting.wav, segment:2, label:95
Processing file: vowel-heart_dataset\A,kundan,p126,m,cycling.wav
vowel-heart_dataset\A,kundan,p126,m,cycling.wav, segment:1, label:126
vowel-heart_dataset\A,kundan,p126,m,cycling.wav, segment:2, label:126
Processing file: vowel-heart_dataset\A,lala,p108,m,basket ball.wav
vowel-heart_dataset\A,lala,p108,m,basket ball.wav, segment:1, label:108
vowel-heart_dataset\A,lala,p108,m,basket ball.wav, segment:2, label:108
Processing file: vowel-heart_dataset\a,manoj,90,sitting.Wav
vowel-heart_dataset\a,manoj,90,sitting.Wav, segment:1, label:90
vowel-heart_dataset\a,manoj,90,sitting.Wav, segment:2, label:90
Processing 

  super().__init__(**kwargs)


Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 126ms/step - loss: 245880.7969 - mean_absolute_error: 313.0830 - val_loss: 3079.5269 - val_mean_absolute_error: 51.3258
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - loss: 9039.4648 - mean_absolute_error: 68.0624 - val_loss: 453.9644 - val_mean_absolute_error: 15.5649
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 6553.5166 - mean_absolute_error: 41.2250 - val_loss: 970.1295 - val_mean_absolute_error: 23.8755
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 10168.1514 - mean_absolute_error: 44.2518 - val_loss: 1013.0070 - val_mean_absolute_error: 23.9498
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 7774.6621 - mean_absolute_error: 33.1411 - val_loss: 759.3245 - val_mean_absolute_error: 20.7801
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



Model saved to trained_model.h5




Model loaded from trained_model.h5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 350.9630 - mean_absolute_error: 15.6156 
Test Mean Absolute Error: 15.887894630432129
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Accuracy Percentage: 17.39%
Actual: 72, Predicted: 100.323974609375
Actual: 95, Predicted: 64.13053894042969
Actual: 90, Predicted: 77.06253051757812
Actual: 90, Predicted: 114.18009185791016
Actual: 75, Predicted: 56.47462463378906
Actual: 75, Predicted: 69.17396545410156
Actual: 75, Predicted: 90.55513000488281
Actual: 70, Predicted: 84.68574523925781
Actual: 83, Predicted: 66.09059143066406
Actual: 88, Predicted: 84.476806640625
Actual: 73, Predicted: 83.86048889160156
Actual: 95, Predicted: 86.70692443847656
Actual: 144, Predicted: 94.46601867675781
Actual: 110, Predicted: 95.44499206542969
Actual: 105, Predicted: 101.25260162353516
Actual: 120, Predicted: 100.94189453125
Actual: 75, Predicted: 52.97322082519531
A

In [1]:
import numpy as np
import librosa
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.models import load_model
import sounddevice as sd

# Parameters
fs = 44100  # Target sampling rate
n_mfcc = 13  # Number of MFCC features
n_fft = 2048  # Interval to apply FFT
hop_length = 512  # Sliding window for FFT
max_length = 300  # Maximum length of MFCC features (number of frames)
record_duration = 6  # Duration of the recording in seconds

# Load the trained model
model_save_path = 'trained_model.h5'
model = load_model(model_save_path)
print(f"Model loaded from {model_save_path}")

# Record audio for the specified duration
print("Recording...")
audio = sd.rec(int(record_duration * fs), samplerate=fs, channels=1, dtype='float32')
sd.wait()  # Wait until recording is finished
print("Recording finished")

# Convert to mono if needed
if audio.ndim > 1:
    audio = librosa.to_mono(audio.T)

# Extract MFCC features from the recorded audio
def extract_mfcc_from_audio(audio, sample_rate, n_mfcc, n_fft, hop_length, max_length):
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    mfcc = mfcc.T
    # Pad or truncate MFCC features to a consistent length
    if len(mfcc) < max_length:
        pad_width = max_length - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_length]
    return  mfcc

mfcc_features = extract_mfcc_from_audio(audio.flatten(), fs, n_mfcc, n_fft, hop_length, max_length)
mfcc_features = np.expand_dims(mfcc_features, axis=0)  # Add batch dimension

# Predict the output using the model
prediction = model.predict(mfcc_features)
print(f"Predicted Value: {prediction[0][0]}")




Model loaded from trained_model.h5
Recording...
Recording finished
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step
Predicted Value: 151.00010681152344


In [8]:
import numpy as np
import os
import librosa
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import Dense, Flatten
from sklearn.model_selection import train_test_split

# Parameters
fs = 44100  # Target sampling rate
n_mfcc = 13  # Number of MFCC features
n_fft = 2048  # Interval to apply FFT
hop_length = 512  # Sliding window for FFT
num_segments = 2  # Number of segments to divide each audio file
max_length = 300  # Maximum length of MFCC features (number of frames)

# Directory containing audio files
input_dir = 'vowel-heart_dataset'  # Replace with your directory containing .wav files

# Initialize lists to store features and labels
features = []
labels = []

# Function to extract MFCC features from an audio file segment
def extract_mfcc_segment(signal, sample_rate, start, finish):
    mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    return mfcc.T

# Function to extract oximeter reading from file name
def extract_oximeter_reading(file_name):
    oximeter_reading = int(''.join(filter(str.isdigit, file_name)))
    return oximeter_reading

# Pad or truncate MFCC features to a consistent length
def pad_or_truncate(mfcc, max_length):
    if len(mfcc) < max_length:
        pad_width = max_length - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_length]
    return mfcc

# Process each file in the directory
for filename in os.listdir(input_dir):
    if filename.endswith(".wav") or filename.endswith(".Wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"Processing file: {file_path}")
        
        # Load audio file
        audio, original_fs = sf.read(file_path)
        if len(audio.shape) > 1:
            audio = librosa.to_mono(audio.T)
        if original_fs != fs:
            audio = librosa.resample(audio, orig_sr=original_fs, target_sr=fs)
        
        # Calculate segment length and number of MFCC vectors per segment
        samples_per_segment = int(len(audio) / num_segments)
        num_mfcc_vectors_per_segment = int(np.ceil(samples_per_segment / hop_length))

        # Extract MFCC features from each segment
        for segment in range(num_segments):
            start = samples_per_segment * segment
            finish = start + samples_per_segment
            
            mfcc_features = extract_mfcc_segment(audio, fs, start, finish)
            mfcc_features = pad_or_truncate(mfcc_features, max_length)
            
            # Append the MFCC features of the current segment
            features.append(mfcc_features)
            # Append the label (oximeter reading) of the current segment
            oximeter_reading = extract_oximeter_reading(filename)
            labels.append(oximeter_reading)
            print(f"{file_path}, segment:{segment + 1}, label:{oximeter_reading}")

# Convert lists to numpy arrays
features = np.array(features)
labels = np.array(labels)

print(f"Extracted features shape: {features.shape}")
print(f"Labels shape: {labels.shape}")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential([
    Flatten(input_shape=(max_length, n_mfcc)),
    Dense(256, activation='relu'),
    # Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')  # Regression output
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32)

# Save the trained model
model_save_path = 'trained_model.h5'
model.save(model_save_path)
print(f"Model saved to {model_save_path}")

# Load the model (you can load it in a different script or session)
model = load_model(model_save_path)
print(f"Model loaded from {model_save_path}")

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test Mean Absolute Error: {mae}")

# Predict the labels
#print(len(X_test))
y_pred = model.predict(X_test)

# Calculate the mean percentage error
percentage_errors = np.abs((y_test - y_pred.flatten()) / y_test) * 100
mean_percentage_error = np.mean(percentage_errors)

# Calculate the accuracy percentage
accuracy_percentage = 100 - mean_percentage_error
print(f"Accuracy Percentage: {accuracy_percentage:.2f}%")

# Print the predictions and actual labels
for i in range(len(y_test)):
    print(f"Actual: {y_test[i]}, Predicted: {y_pred[i][0]}")


Processing file: vowel-heart_dataset\a,abhay,75,sitting.Wav
vowel-heart_dataset\a,abhay,75,sitting.Wav, segment:1, label:75
vowel-heart_dataset\a,abhay,75,sitting.Wav, segment:2, label:75
Processing file: vowel-heart_dataset\A,Gokul,p95,m,sitting.wav
vowel-heart_dataset\A,Gokul,p95,m,sitting.wav, segment:1, label:95
vowel-heart_dataset\A,Gokul,p95,m,sitting.wav, segment:2, label:95
Processing file: vowel-heart_dataset\A,kundan,p126,m,cycling.wav
vowel-heart_dataset\A,kundan,p126,m,cycling.wav, segment:1, label:126
vowel-heart_dataset\A,kundan,p126,m,cycling.wav, segment:2, label:126
Processing file: vowel-heart_dataset\A,lala,p108,m,basket ball.wav
vowel-heart_dataset\A,lala,p108,m,basket ball.wav, segment:1, label:108
vowel-heart_dataset\A,lala,p108,m,basket ball.wav, segment:2, label:108
Processing file: vowel-heart_dataset\a,manoj,90,sitting.Wav
vowel-heart_dataset\a,manoj,90,sitting.Wav, segment:1, label:90
vowel-heart_dataset\a,manoj,90,sitting.Wav, segment:2, label:90
Processing 



Model saved to trained_model.h5




Model loaded from trained_model.h5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 298.0335 - mean_absolute_error: 12.9088  
Test Mean Absolute Error: 13.204462051391602
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366ms/step
Accuracy Percentage: 86.11%
Actual: 72, Predicted: 86.59745025634766
Actual: 95, Predicted: 57.952415466308594
Actual: 90, Predicted: 80.2306137084961
Actual: 90, Predicted: 108.23477935791016
Actual: 75, Predicted: 76.9393539428711
Actual: 75, Predicted: 81.4524154663086
Actual: 75, Predicted: 85.18372344970703
Actual: 70, Predicted: 74.2496566772461
Actual: 83, Predicted: 72.53765106201172
Actual: 88, Predicted: 101.07548522949219
Actual: 73, Predicted: 77.2585678100586
Actual: 95, Predicted: 88.9222412109375
Actual: 144, Predicted: 84.13593292236328
Actual: 110, Predicted: 106.82762908935547
Actual: 105, Predicted: 110.1688461303711
Actual: 120, Predicted: 104.68197631835938
Actual: 75, Predicted: 71.4834823608398

In [1]:
import numpy as np
import librosa
import soundfile as sf
import sounddevice as sd
import tensorflow as tf
from tensorflow.keras.models import load_model

# Parameters
fs = 44100  # Target sampling rate
n_mfcc = 13  # Number of MFCC features
n_fft = 2048  # Interval to apply FFT
hop_length = 512  # Sliding window for FFT
num_segments = 2  # Number of segments to divide each audio file
max_length = 300  # Maximum length of MFCC features (number of frames)
model_save_path = 'trained_model.h5'  # Path to the trained model

# Load the trained model
model = load_model(model_save_path)
print(f"Model loaded from {model_save_path}")

# Function to extract MFCC features from an audio file segment
def extract_mfcc_segment(signal, sample_rate, start, finish):
    mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    return mfcc.T

# Pad or truncate MFCC features to a consistent length
def pad_or_truncate(mfcc, max_length):
    if len(mfcc) < max_length:
        pad_width = max_length - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_length]
    return mfcc

# Function to record audio
def record_audio(duration=5, sample_rate=fs):
    print("Recording audio...")
    audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
    sd.wait()
    print("Recording complete.")
    return audio_data.flatten(), sample_rate

# Function to process audio and predict heart rate
def predict_heart_rate(audio_data, sample_rate):
    # Calculate segment length and number of MFCC vectors per segment
    samples_per_segment = int(len(audio_data) / num_segments)
    num_mfcc_vectors_per_segment = int(np.ceil(samples_per_segment / hop_length))

    all_mfcc_features = []
    
    # Extract MFCC features from each segment
    for segment in range(num_segments):
        start = samples_per_segment * segment
        finish = start + samples_per_segment

        mfcc_features = extract_mfcc_segment(audio_data, sample_rate, start, finish)
        mfcc_features = pad_or_truncate(mfcc_features, max_length)
        
        all_mfcc_features.append(mfcc_features)
    
    all_mfcc_features = np.array(all_mfcc_features)

    # Predict heart rate
    predictions = model.predict(all_mfcc_features)
    avg_prediction = np.mean(predictions)
    
    return avg_prediction

# Record audio for 5 seconds and predict heart rate
recorded_audio_data, recorded_sample_rate = record_audio(duration=5)
estimated_heart_rate = predict_heart_rate(recorded_audio_data, recorded_sample_rate)

print(f"Estimated Heart Rate (bpm): {estimated_heart_rate:.2f}")




Model loaded from trained_model.h5
Recording audio...
Recording complete.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Estimated Heart Rate (bpm): 75.05
